Diff 477134

llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def

Show All 28 Lines	enum PartialMappingIdx {
PM_VGPR64 = 18,		PM_VGPR64 = 18,
PM_VGPR128 = 19,		PM_VGPR128 = 19,
PM_VGPR256 = 20,		PM_VGPR256 = 20,
PM_VGPR512 = 21,		PM_VGPR512 = 21,
PM_VGPR1024 = 22,		PM_VGPR1024 = 22,
PM_SGPR96 = 23,		PM_SGPR96 = 23,
PM_VGPR96 = 24,		PM_VGPR96 = 24,
PM_AGPR96 = 25,		PM_AGPR96 = 25,
PM_AGPR32 = 31,		PM_SGPR288 = 26,
PM_AGPR64 = 32,		PM_VGPR288 = 27,
PM_AGPR128 = 33,		PM_AGPR288 = 28,
PM_AGPR256 = 34,		PM_SGPR320 = 29,
PM_AGPR512 = 35,		PM_VGPR320 = 30,
PM_AGPR1024 = 36		PM_AGPR320 = 31,
		PM_SGPR352 = 32,
		PM_VGPR352 = 33,
		PM_AGPR352 = 34,
		PM_SGPR384 = 35,
		PM_VGPR384 = 36,
		PM_AGPR384 = 37,
		PM_AGPR32 = 38,
		PM_AGPR64 = 39,
		PM_AGPR128 = 40,
		PM_AGPR256 = 41,
		PM_AGPR512 = 42,
		PM_AGPR1024 = 43
};		};

const RegisterBankInfo::PartialMapping PartMappings[] {		const RegisterBankInfo::PartialMapping PartMappings[] {
// StartIdx, Length, RegBank		// StartIdx, Length, RegBank
{0, 1, VCCRegBank},		{0, 1, VCCRegBank},

{0, 1, SGPRRegBank}, // SGPR begin		{0, 1, SGPRRegBank}, // SGPR begin
{0, 16, SGPRRegBank},		{0, 16, SGPRRegBank},
Show All 10 Lines	const RegisterBankInfo::PartialMapping PartMappings[] {
{0, 64, VGPRRegBank},		{0, 64, VGPRRegBank},
{0, 128, VGPRRegBank},		{0, 128, VGPRRegBank},
{0, 256, VGPRRegBank},		{0, 256, VGPRRegBank},
{0, 512, VGPRRegBank},		{0, 512, VGPRRegBank},
{0, 1024, VGPRRegBank},		{0, 1024, VGPRRegBank},
{0, 96, SGPRRegBank},		{0, 96, SGPRRegBank},
{0, 96, VGPRRegBank},		{0, 96, VGPRRegBank},
{0, 96, AGPRRegBank},		{0, 96, AGPRRegBank},
		{0, 288, SGPRRegBank},
		{0, 288, VGPRRegBank},
		{0, 288, AGPRRegBank},
		{0, 320, SGPRRegBank},
		{0, 320, VGPRRegBank},
		{0, 320, AGPRRegBank},
		{0, 352, SGPRRegBank},
		{0, 352, VGPRRegBank},
		{0, 352, AGPRRegBank},
		{0, 384, SGPRRegBank},
		{0, 384, VGPRRegBank},
		{0, 384, AGPRRegBank},

{0, 32, AGPRRegBank}, // AGPR begin		{0, 32, AGPRRegBank}, // AGPR begin
{0, 64, AGPRRegBank},		{0, 64, AGPRRegBank},
{0, 128, AGPRRegBank},		{0, 128, AGPRRegBank},
{0, 256, AGPRRegBank},		{0, 256, AGPRRegBank},
{0, 512, AGPRRegBank},		{0, 512, AGPRRegBank},
{0, 1024, AGPRRegBank}		{0, 1024, AGPRRegBank}
};		};
Show All 25 Lines	const RegisterBankInfo::ValueMapping ValMappings[] {
{&PartMappings[12], 1}, // 64		{&PartMappings[12], 1}, // 64
{&PartMappings[13], 1}, // 128		{&PartMappings[13], 1}, // 128
{&PartMappings[14], 1}, // 256		{&PartMappings[14], 1}, // 256
{&PartMappings[15], 1}, // 512		{&PartMappings[15], 1}, // 512
{&PartMappings[16], 1}, // 1024		{&PartMappings[16], 1}, // 1024
{&PartMappings[17], 1},		{&PartMappings[17], 1},
{&PartMappings[18], 1},		{&PartMappings[18], 1},
{&PartMappings[19], 1},		{&PartMappings[19], 1},
		{&PartMappings[20], 1},
		{&PartMappings[21], 1},
		{&PartMappings[22], 1},
		{&PartMappings[23], 1},
		{&PartMappings[24], 1},
		{&PartMappings[25], 1},
		{&PartMappings[26], 1},
		{&PartMappings[27], 1},
		{&PartMappings[28], 1},
		{&PartMappings[29], 1},
		{&PartMappings[30], 1},
		{&PartMappings[31], 1},

// AGPRs		// AGPRs
{nullptr, 0},		{nullptr, 0},
{nullptr, 0},		{nullptr, 0},
{nullptr, 0},		{nullptr, 0},
{nullptr, 0},		{nullptr, 0},
{nullptr, 0},		{nullptr, 0},
{&PartMappings[20], 1}, // 32		{&PartMappings[32], 1}, // 32
{&PartMappings[21], 1}, // 64		{&PartMappings[33], 1}, // 64
{&PartMappings[22], 1}, // 128		{&PartMappings[34], 1}, // 128
{&PartMappings[23], 1}, // 256		{&PartMappings[35], 1}, // 256
{&PartMappings[24], 1}, // 512		{&PartMappings[36], 1}, // 512
{&PartMappings[25], 1} // 1024		{&PartMappings[37], 1} // 1024
};		};

const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {		const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
{0, 32, SGPRRegBank}, // 32-bit op		{0, 32, SGPRRegBank}, // 32-bit op
{0, 32, SGPRRegBank}, // 2x32-bit op		{0, 32, SGPRRegBank}, // 2x32-bit op
{32, 32, SGPRRegBank},		{32, 32, SGPRRegBank},
{0, 64, SGPRRegBank}, // <2x32-bit> op		{0, 64, SGPRRegBank}, // <2x32-bit> op

Show All 12 Lines	const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {

/32-bit vgpr/ {&SGPROnly64BreakDown[4], 1},		/32-bit vgpr/ {&SGPROnly64BreakDown[4], 1},
/2 x 32-bit vgpr/ {&SGPROnly64BreakDown[5], 2}		/2 x 32-bit vgpr/ {&SGPROnly64BreakDown[5], 2}
};		};

enum ValueMappingIdx {		enum ValueMappingIdx {
SGPRStartIdx = 1,		SGPRStartIdx = 1,
VGPRStartIdx = 12,		VGPRStartIdx = 12,
AGPRStartIdx = 26		AGPRStartIdx = 38
};		};

const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,		const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
unsigned Size) {		unsigned Size) {
unsigned Idx;		unsigned Idx;
switch (Size) {		switch (Size) {
case 1:		case 1:
if (BankID == AMDGPU::VCCRegBankID)		if (BankID == AMDGPU::VCCRegBankID)
Show All 10 Lines	switch (BankID) {
Idx = PM_SGPR96;		Idx = PM_SGPR96;
break;		break;
case AMDGPU::AGPRRegBankID:		case AMDGPU::AGPRRegBankID:
Idx = PM_AGPR96;		Idx = PM_AGPR96;
break;		break;
default: llvm_unreachable("Invalid register bank");		default: llvm_unreachable("Invalid register bank");
}		}
break;		break;
		case 288:
		switch (BankID) {
		case AMDGPU::VGPRRegBankID:
		Idx = PM_VGPR288;
		break;
		case AMDGPU::SGPRRegBankID:
		Idx = PM_SGPR288;
		break;
		case AMDGPU::AGPRRegBankID:
		Idx = PM_AGPR288;
		break;
		default: llvm_unreachable("Invalid register bank");
		}
		break;
		case 320:
		switch (BankID) {
		case AMDGPU::VGPRRegBankID:
		Idx = PM_VGPR320;
		break;
		case AMDGPU::SGPRRegBankID:
		Idx = PM_SGPR320;
		break;
		case AMDGPU::AGPRRegBankID:
		Idx = PM_AGPR320;
		break;
		default: llvm_unreachable("Invalid register bank");
		}
		break;
		case 352:
		switch (BankID) {
		case AMDGPU::VGPRRegBankID:
		Idx = PM_VGPR352;
		break;
		case AMDGPU::SGPRRegBankID:
		Idx = PM_SGPR352;
		break;
		case AMDGPU::AGPRRegBankID:
		Idx = PM_AGPR352;
		break;
		default: llvm_unreachable("Invalid register bank");
		}
		break;
		case 384:
		switch (BankID) {
		case AMDGPU::VGPRRegBankID:
		Idx = PM_VGPR384;
		break;
		case AMDGPU::SGPRRegBankID:
		Idx = PM_SGPR384;
		break;
		case AMDGPU::AGPRRegBankID:
		Idx = PM_AGPR384;
		break;
		default: llvm_unreachable("Invalid register bank");
		}
		break;
default:		default:
switch (BankID) {		switch (BankID) {
case AMDGPU::VGPRRegBankID:		case AMDGPU::VGPRRegBankID:
Idx = VGPRStartIdx;		Idx = VGPRStartIdx;
break;		break;
case AMDGPU::SGPRRegBankID:		case AMDGPU::SGPRRegBankID:
Idx = SGPRStartIdx;		Idx = SGPRStartIdx;
break;		break;
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Show First 20 Lines • Show All 78 Lines • ▼ Show 20 Lines	AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32);		AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32);

setOperationAction(ISD::LOAD, MVT::v7f32, Promote);		setOperationAction(ISD::LOAD, MVT::v7f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32);		AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32);

setOperationAction(ISD::LOAD, MVT::v8f32, Promote);		setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);		AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);

		setOperationAction(ISD::LOAD, MVT::v9f32, Promote);
		AddPromotedToType(ISD::LOAD, MVT::v9f32, MVT::v9i32);

		setOperationAction(ISD::LOAD, MVT::v10f32, Promote);
		AddPromotedToType(ISD::LOAD, MVT::v10f32, MVT::v10i32);

		setOperationAction(ISD::LOAD, MVT::v11f32, Promote);
		AddPromotedToType(ISD::LOAD, MVT::v11f32, MVT::v11i32);

		setOperationAction(ISD::LOAD, MVT::v12f32, Promote);
		AddPromotedToType(ISD::LOAD, MVT::v12f32, MVT::v12i32);

setOperationAction(ISD::LOAD, MVT::v16f32, Promote);		setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);		AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);

setOperationAction(ISD::LOAD, MVT::v32f32, Promote);		setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);		AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);

setOperationAction(ISD::LOAD, MVT::i64, Promote);		setOperationAction(ISD::LOAD, MVT::i64, Promote);
AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);		AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32);		AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32);

setOperationAction(ISD::STORE, MVT::v7f32, Promote);		setOperationAction(ISD::STORE, MVT::v7f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32);		AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32);

setOperationAction(ISD::STORE, MVT::v8f32, Promote);		setOperationAction(ISD::STORE, MVT::v8f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);		AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);

		setOperationAction(ISD::STORE, MVT::v9f32, Promote);
		AddPromotedToType(ISD::STORE, MVT::v9f32, MVT::v9i32);

		setOperationAction(ISD::STORE, MVT::v10f32, Promote);
		AddPromotedToType(ISD::STORE, MVT::v10f32, MVT::v10i32);

		setOperationAction(ISD::STORE, MVT::v11f32, Promote);
		AddPromotedToType(ISD::STORE, MVT::v11f32, MVT::v11i32);

		setOperationAction(ISD::STORE, MVT::v12f32, Promote);
		AddPromotedToType(ISD::STORE, MVT::v12f32, MVT::v12i32);

setOperationAction(ISD::STORE, MVT::v16f32, Promote);		setOperationAction(ISD::STORE, MVT::v16f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);		AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);

setOperationAction(ISD::STORE, MVT::v32f32, Promote);		setOperationAction(ISD::STORE, MVT::v32f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);		AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);

setOperationAction(ISD::STORE, MVT::i64, Promote);		setOperationAction(ISD::STORE, MVT::i64, Promote);
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);		AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
▲ Show 20 Lines • Show All 98 Lines • ▼ Show 20 Lines	AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);		setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);

setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);		setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);

// Expand to fneg + fadd.		// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);		setOperationAction(ISD::FSUB, MVT::f64, Expand);

setOperationAction(ISD::CONCAT_VECTORS,		setOperationAction(ISD::CONCAT_VECTORS,
{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,		{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,
MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,		MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},		MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
		MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
		MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
Custom);		Custom);
setOperationAction(		setOperationAction(
ISD::EXTRACT_SUBVECTOR,		ISD::EXTRACT_SUBVECTOR,
{MVT::v2f16, MVT::v2i16, MVT::v4f16, MVT::v4i16, MVT::v2f32,		{MVT::v2f16, MVT::v2i16, MVT::v4f16, MVT::v4i16, MVT::v2f32,
MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, MVT::v4i32,		MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, MVT::v4i32,
MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, MVT::v7f32,		MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, MVT::v7f32,
MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v16f16, MVT::v16i16,		MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v9f32, MVT::v9i32,
MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64,		MVT::v10i32, MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32,
MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, MVT::v4i64,		MVT::v12f32, MVT::v16f16, MVT::v16i16, MVT::v16f32, MVT::v16i32,
MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64},		MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64,
		MVT::v3i64, MVT::v4f64, MVT::v4i64, MVT::v8f64, MVT::v8i64,
		MVT::v16f64, MVT::v16i64},
Custom);		Custom);

setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);		setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom);		setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom);

const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };		const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {		for (MVT VT : ScalarIntVTs) {
// These should use [SU]DIVREM, so set them to expand		// These should use [SU]DIVREM, so set them to expand
Show All 30 Lines	AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,		setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
Legal);		Legal);

setOperationAction(		setOperationAction(
{ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},		{ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
MVT::i64, Custom);		MVT::i64, Custom);

static const MVT::SimpleValueType VectorIntTypes[] = {		static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32};		MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,
		MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};

for (MVT VT : VectorIntTypes) {		for (MVT VT : VectorIntTypes) {
// Expand the following operations for the current type by default.		// Expand the following operations for the current type by default.
setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT,		setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT,
ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU,		ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU,
ISD::MULHS, ISD::OR, ISD::SHL,		ISD::MULHS, ISD::OR, ISD::SHL,
ISD::SRA, ISD::SRL, ISD::ROTL,		ISD::SRA, ISD::SRL, ISD::ROTL,
ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP,		ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP,
ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV,		ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV,
ISD::SREM, ISD::UREM, ISD::SMUL_LOHI,		ISD::SREM, ISD::UREM, ISD::SMUL_LOHI,
ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM,		ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM,
ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC,		ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC,
ISD::XOR, ISD::BSWAP, ISD::CTPOP,		ISD::XOR, ISD::BSWAP, ISD::CTPOP,
ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE,		ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE,
ISD::SETCC},		ISD::SETCC},
VT, Expand);		VT, Expand);
}		}

static const MVT::SimpleValueType FloatVectorTypes[] = {		static const MVT::SimpleValueType FloatVectorTypes[] = {
MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32};		MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,
		MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};

for (MVT VT : FloatVectorTypes) {		for (MVT VT : FloatVectorTypes) {
setOperationAction(		setOperationAction(
{ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,		{ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2,		ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2,
ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG,		ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG,
ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC,		ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC,
ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,		ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
Show All 19 Lines	AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);		AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);

setOperationAction(ISD::SELECT, MVT::v6f32, Promote);		setOperationAction(ISD::SELECT, MVT::v6f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32);		AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32);

setOperationAction(ISD::SELECT, MVT::v7f32, Promote);		setOperationAction(ISD::SELECT, MVT::v7f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32);		AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32);

		setOperationAction(ISD::SELECT, MVT::v9f32, Promote);
		AddPromotedToType(ISD::SELECT, MVT::v9f32, MVT::v9i32);

		setOperationAction(ISD::SELECT, MVT::v10f32, Promote);
		AddPromotedToType(ISD::SELECT, MVT::v10f32, MVT::v10i32);

		setOperationAction(ISD::SELECT, MVT::v11f32, Promote);
		AddPromotedToType(ISD::SELECT, MVT::v11f32, MVT::v11i32);

		setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
		AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);

// There are no libcalls of any kind.		// There are no libcalls of any kind.
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)		for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);		setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);

setSchedulingPreference(Sched::RegPressure);		setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);		setJumpIsExpensive(true);

// FIXME: This is only partially true. If we have to do vector compares, any		// FIXME: This is only partially true. If we have to do vector compares, any
▲ Show 20 Lines • Show All 608 Lines • ▼ Show 20 Lines	for (unsigned Value = 0, NumValues = ValueVTs.size();

// Convert one element vectors to scalar.		// Convert one element vectors to scalar.
if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)		if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
MemVT = MemVT.getScalarType();		MemVT = MemVT.getScalarType();

// Round up vec3/vec5 argument.		// Round up vec3/vec5 argument.
if (MemVT.isVector() && !MemVT.isPow2VectorType()) {		if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
assert(MemVT.getVectorNumElements() == 3 \|\|		assert(MemVT.getVectorNumElements() == 3 \|\|
MemVT.getVectorNumElements() == 5);		MemVT.getVectorNumElements() == 5 \|\|
		(MemVT.getVectorNumElements() >= 9 &&
		MemVT.getVectorNumElements() <= 12));
MemVT = MemVT.getPow2VectorType(State.getContext());		MemVT = MemVT.getPow2VectorType(State.getContext());
} else if (!MemVT.isSimple() && !MemVT.isVector()) {		} else if (!MemVT.isSimple() && !MemVT.isVector()) {
MemVT = MemVT.getRoundIntegerType(State.getContext());		MemVT = MemVT.getRoundIntegerType(State.getContext());
}		}

unsigned PartOffset = 0;		unsigned PartOffset = 0;
for (unsigned i = 0; i != NumRegs; ++i) {		for (unsigned i = 0; i != NumRegs; ++i) {
State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT,		State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT,
▲ Show 20 Lines • Show All 3,814 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 5,401 Lines • ▼ Show 20 Lines	if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],		Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA		IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx10NSA,		: AMDGPU::MIMGEncGfx10NSA,
NumVDataDwords, NumVAddrDwords);		NumVDataDwords, NumVAddrDwords);
} else {		} else {
Opcode = AMDGPU::getMIMGOpcode(		Opcode = AMDGPU::getMIMGOpcode(
BaseOpcodes[Is64][IsA16],		BaseOpcodes[Is64][IsA16],
IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default,		IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default,
NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));		NumVDataDwords, NumVAddrDwords);
}		}
assert(Opcode != -1);		assert(Opcode != -1);

SmallVector<Register, 12> Ops;		SmallVector<Register, 12> Ops;
if (UseNSA && IsGFX11Plus) {		if (UseNSA && IsGFX11Plus) {
auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) {		auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) {
auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);		auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
auto Merged = B.buildMerge(		auto Merged = B.buildMerge(
▲ Show 20 Lines • Show All 359 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td

	//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------- tablegen --=//			//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------- tablegen --=//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def SGPRRegBank : RegisterBank<"SGPR",			def SGPRRegBank : RegisterBank<"SGPR",
	[SReg_LO16, SReg_32, SReg_64, SReg_96, SReg_128, SReg_160, SReg_192, SReg_224, SReg_256, SReg_512, SReg_1024]			[SReg_LO16, SReg_32, SReg_64, SReg_96, SReg_128, SReg_160, SReg_192, SReg_224, SReg_256, SReg_288, SReg_320, SReg_352, SReg_384, SReg_512, SReg_1024]
	>;			>;

	def VGPRRegBank : RegisterBank<"VGPR",			def VGPRRegBank : RegisterBank<"VGPR",
	[VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_512, VReg_1024]			[VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
	>;			>;

	// It is helpful to distinguish conditions from ordinary SGPRs.			// It is helpful to distinguish conditions from ordinary SGPRs.
	def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;			def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;

	def AGPRRegBank : RegisterBank <"AGPR",			def AGPRRegBank : RegisterBank <"AGPR",
	[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_512, AReg_1024]			[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_288, AReg_320, AReg_352, AReg_384, AReg_512, AReg_1024]
	>;			>;

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Show First 20 Lines • Show All 421 Lines • ▼ Show 20 Lines	for (const MachineInstr &MI : MBB) {
Width = 8;		Width = 8;
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {		} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
IsSGPR = false;		IsSGPR = false;
Width = 8;		Width = 8;
} else if (AMDGPU::AReg_256RegClass.contains(Reg)) {		} else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
IsSGPR = false;		IsSGPR = false;
IsAGPR = true;		IsAGPR = true;
Width = 8;		Width = 8;
		} else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
		IsSGPR = false;
		Width = 9;
		} else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
		IsSGPR = true;
		Width = 9;
		} else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
		IsSGPR = false;
		IsAGPR = true;
		Width = 9;
		} else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
		IsSGPR = false;
		Width = 10;
		} else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
		IsSGPR = true;
		Width = 10;
		} else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
		IsSGPR = false;
		IsAGPR = true;
		Width = 10;
		} else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
		IsSGPR = false;
		Width = 11;
		} else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
		IsSGPR = true;
		Width = 11;
		} else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
		IsSGPR = false;
		IsAGPR = true;
		Width = 11;
		} else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
		IsSGPR = false;
		Width = 12;
		} else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
		IsSGPR = true;
		Width = 12;
		} else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
		IsSGPR = false;
		IsAGPR = true;
		Width = 12;
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {		} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&		assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
"trap handler registers should not be used");		"trap handler registers should not be used");
IsSGPR = true;		IsSGPR = true;
Width = 16;		Width = 16;
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {		} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
IsSGPR = false;		IsSGPR = false;
Width = 16;		Width = 16;
▲ Show 20 Lines • Show All 133 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,377 Lines • ▼ Show 20 Lines	switch (RegWidth) {
case 160:		case 160:
return AMDGPU::VReg_160RegClassID;		return AMDGPU::VReg_160RegClassID;
case 192:		case 192:
return AMDGPU::VReg_192RegClassID;		return AMDGPU::VReg_192RegClassID;
case 224:		case 224:
return AMDGPU::VReg_224RegClassID;		return AMDGPU::VReg_224RegClassID;
case 256:		case 256:
return AMDGPU::VReg_256RegClassID;		return AMDGPU::VReg_256RegClassID;
		case 288:
		return AMDGPU::VReg_288RegClassID;
		case 320:
		return AMDGPU::VReg_320RegClassID;
		case 352:
		return AMDGPU::VReg_352RegClassID;
		case 384:
		return AMDGPU::VReg_384RegClassID;
case 512:		case 512:
return AMDGPU::VReg_512RegClassID;		return AMDGPU::VReg_512RegClassID;
case 1024:		case 1024:
return AMDGPU::VReg_1024RegClassID;		return AMDGPU::VReg_1024RegClassID;
}		}
} else if (Is == IS_TTMP) {		} else if (Is == IS_TTMP) {
switch (RegWidth) {		switch (RegWidth) {
default: return -1;		default: return -1;
Show All 22 Lines	switch (RegWidth) {
case 160:		case 160:
return AMDGPU::SGPR_160RegClassID;		return AMDGPU::SGPR_160RegClassID;
case 192:		case 192:
return AMDGPU::SGPR_192RegClassID;		return AMDGPU::SGPR_192RegClassID;
case 224:		case 224:
return AMDGPU::SGPR_224RegClassID;		return AMDGPU::SGPR_224RegClassID;
case 256:		case 256:
return AMDGPU::SGPR_256RegClassID;		return AMDGPU::SGPR_256RegClassID;
		case 288:
		return AMDGPU::SGPR_288RegClassID;
		case 320:
		return AMDGPU::SGPR_320RegClassID;
		case 352:
		return AMDGPU::SGPR_352RegClassID;
		case 384:
		return AMDGPU::SGPR_384RegClassID;
case 512:		case 512:
return AMDGPU::SGPR_512RegClassID;		return AMDGPU::SGPR_512RegClassID;
}		}
} else if (Is == IS_AGPR) {		} else if (Is == IS_AGPR) {
switch (RegWidth) {		switch (RegWidth) {
default: return -1;		default: return -1;
case 32:		case 32:
return AMDGPU::AGPR_32RegClassID;		return AMDGPU::AGPR_32RegClassID;
case 64:		case 64:
return AMDGPU::AReg_64RegClassID;		return AMDGPU::AReg_64RegClassID;
case 96:		case 96:
return AMDGPU::AReg_96RegClassID;		return AMDGPU::AReg_96RegClassID;
case 128:		case 128:
return AMDGPU::AReg_128RegClassID;		return AMDGPU::AReg_128RegClassID;
case 160:		case 160:
return AMDGPU::AReg_160RegClassID;		return AMDGPU::AReg_160RegClassID;
case 192:		case 192:
return AMDGPU::AReg_192RegClassID;		return AMDGPU::AReg_192RegClassID;
case 224:		case 224:
return AMDGPU::AReg_224RegClassID;		return AMDGPU::AReg_224RegClassID;
case 256:		case 256:
return AMDGPU::AReg_256RegClassID;		return AMDGPU::AReg_256RegClassID;
		case 288:
		return AMDGPU::AReg_288RegClassID;
		case 320:
		return AMDGPU::AReg_320RegClassID;
		case 352:
		return AMDGPU::AReg_352RegClassID;
		case 384:
		return AMDGPU::AReg_384RegClassID;
case 512:		case 512:
return AMDGPU::AReg_512RegClassID;		return AMDGPU::AReg_512RegClassID;
case 1024:		case 1024:
return AMDGPU::AReg_1024RegClassID;		return AMDGPU::AReg_1024RegClassID;
}		}
}		}
return -1;		return -1;
}		}
▲ Show 20 Lines • Show All 1,250 Lines • ▼ Show 20 Lines	unsigned ActualAddrSize =
IsNSA ? SrsrcIdx - VAddr0Idx		IsNSA ? SrsrcIdx - VAddr0Idx
: AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;		: AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());		bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());

unsigned ExpectedAddrSize =		unsigned ExpectedAddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());		AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());

if (!IsNSA) {		if (!IsNSA) {
if (ExpectedAddrSize > 8)		if (ExpectedAddrSize > 12)
ExpectedAddrSize = 16;		ExpectedAddrSize = 16;

// Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.		// Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
// This provides backward compatibility for assembly created		// This provides backward compatibility for assembly created
// before 160b/192b/224b types were directly supported.		// before 160b/192b/224b types were directly supported.
if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))		if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
return true;		return true;
}		}
▲ Show 20 Lines • Show All 5,621 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Show First 20 Lines • Show All 176 Lines • ▼ Show 20 Lines	public:
MCOperand decodeOperand_VSrc16(unsigned Val) const;		MCOperand decodeOperand_VSrc16(unsigned Val) const;
MCOperand decodeOperand_VSrcV216(unsigned Val) const;		MCOperand decodeOperand_VSrcV216(unsigned Val) const;
MCOperand decodeOperand_VSrcV232(unsigned Val) const;		MCOperand decodeOperand_VSrcV232(unsigned Val) const;

MCOperand decodeOperand_VReg_64(unsigned Val) const;		MCOperand decodeOperand_VReg_64(unsigned Val) const;
MCOperand decodeOperand_VReg_96(unsigned Val) const;		MCOperand decodeOperand_VReg_96(unsigned Val) const;
MCOperand decodeOperand_VReg_128(unsigned Val) const;		MCOperand decodeOperand_VReg_128(unsigned Val) const;
MCOperand decodeOperand_VReg_256(unsigned Val) const;		MCOperand decodeOperand_VReg_256(unsigned Val) const;
		MCOperand decodeOperand_VReg_288(unsigned Val) const;
		MCOperand decodeOperand_VReg_320(unsigned Val) const;
		MCOperand decodeOperand_VReg_352(unsigned Val) const;
		MCOperand decodeOperand_VReg_384(unsigned Val) const;
MCOperand decodeOperand_VReg_512(unsigned Val) const;		MCOperand decodeOperand_VReg_512(unsigned Val) const;
MCOperand decodeOperand_VReg_1024(unsigned Val) const;		MCOperand decodeOperand_VReg_1024(unsigned Val) const;

MCOperand decodeOperand_SReg_32(unsigned Val) const;		MCOperand decodeOperand_SReg_32(unsigned Val) const;
MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;		MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;		MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;
MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const;		MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const;
MCOperand decodeOperand_SReg_64(unsigned Val) const;		MCOperand decodeOperand_SReg_64(unsigned Val) const;
MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;		MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
MCOperand decodeOperand_SReg_128(unsigned Val) const;		MCOperand decodeOperand_SReg_128(unsigned Val) const;
MCOperand decodeOperand_SReg_256(unsigned Val) const;		MCOperand decodeOperand_SReg_256(unsigned Val) const;
		MCOperand decodeOperand_SReg_288(unsigned Val) const;
		MCOperand decodeOperand_SReg_320(unsigned Val) const;
		MCOperand decodeOperand_SReg_352(unsigned Val) const;
		MCOperand decodeOperand_SReg_384(unsigned Val) const;
MCOperand decodeOperand_SReg_512(unsigned Val) const;		MCOperand decodeOperand_SReg_512(unsigned Val) const;

MCOperand decodeOperand_AGPR_32(unsigned Val) const;		MCOperand decodeOperand_AGPR_32(unsigned Val) const;
MCOperand decodeOperand_AReg_64(unsigned Val) const;		MCOperand decodeOperand_AReg_64(unsigned Val) const;
MCOperand decodeOperand_AReg_128(unsigned Val) const;		MCOperand decodeOperand_AReg_128(unsigned Val) const;
MCOperand decodeOperand_AReg_256(unsigned Val) const;		MCOperand decodeOperand_AReg_256(unsigned Val) const;
		MCOperand decodeOperand_AReg_288(unsigned Val) const;
		MCOperand decodeOperand_AReg_320(unsigned Val) const;
		MCOperand decodeOperand_AReg_352(unsigned Val) const;
		MCOperand decodeOperand_AReg_384(unsigned Val) const;
MCOperand decodeOperand_AReg_512(unsigned Val) const;		MCOperand decodeOperand_AReg_512(unsigned Val) const;
MCOperand decodeOperand_AReg_1024(unsigned Val) const;		MCOperand decodeOperand_AReg_1024(unsigned Val) const;
MCOperand decodeOperand_AV_32(unsigned Val) const;		MCOperand decodeOperand_AV_32(unsigned Val) const;
MCOperand decodeOperand_AV_64(unsigned Val) const;		MCOperand decodeOperand_AV_64(unsigned Val) const;
MCOperand decodeOperand_AV_128(unsigned Val) const;		MCOperand decodeOperand_AV_128(unsigned Val) const;
MCOperand decodeOperand_AVDst_128(unsigned Val) const;		MCOperand decodeOperand_AVDst_128(unsigned Val) const;
MCOperand decodeOperand_AVDst_512(unsigned Val) const;		MCOperand decodeOperand_AVDst_512(unsigned Val) const;

enum OpWidthTy {		enum OpWidthTy {
OPW32,		OPW32,
OPW64,		OPW64,
OPW96,		OPW96,
OPW128,		OPW128,
OPW160,		OPW160,
OPW256,		OPW256,
		OPW288,
		OPW320,
		OPW352,
		OPW384,
OPW512,		OPW512,
OPW1024,		OPW1024,
OPW16,		OPW16,
OPWV216,		OPWV216,
OPWV232,		OPWV232,
OPW_LAST_,		OPW_LAST_,
OPW_FIRST_ = OPW32		OPW_FIRST_ = OPW32
};		};
▲ Show 20 Lines • Show All 74 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Show First 20 Lines • Show All 123 Lines • ▼ Show 20 Lines
DECODE_OPERAND_REG(VS_32)		DECODE_OPERAND_REG(VS_32)
DECODE_OPERAND_REG(VS_64)		DECODE_OPERAND_REG(VS_64)
DECODE_OPERAND_REG(VS_128)		DECODE_OPERAND_REG(VS_128)

DECODE_OPERAND_REG(VReg_64)		DECODE_OPERAND_REG(VReg_64)
DECODE_OPERAND_REG(VReg_96)		DECODE_OPERAND_REG(VReg_96)
DECODE_OPERAND_REG(VReg_128)		DECODE_OPERAND_REG(VReg_128)
DECODE_OPERAND_REG(VReg_256)		DECODE_OPERAND_REG(VReg_256)
		DECODE_OPERAND_REG(VReg_288)
		DECODE_OPERAND_REG(VReg_352)
		DECODE_OPERAND_REG(VReg_384)
DECODE_OPERAND_REG(VReg_512)		DECODE_OPERAND_REG(VReg_512)
DECODE_OPERAND_REG(VReg_1024)		DECODE_OPERAND_REG(VReg_1024)

DECODE_OPERAND_REG(SReg_32)		DECODE_OPERAND_REG(SReg_32)
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)		DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
DECODE_OPERAND_REG(SReg_32_XEXEC_HI)		DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
DECODE_OPERAND_REG(SRegOrLds_32)		DECODE_OPERAND_REG(SRegOrLds_32)
DECODE_OPERAND_REG(SReg_64)		DECODE_OPERAND_REG(SReg_64)
▲ Show 20 Lines • Show All 787 Lines • ▼ Show 20 Lines	if (isGFX10Plus()) {
const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());		const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());

AddrSize =		AddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));		AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));

IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA \|\|		IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA \|\|
Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;		Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
if (!IsNSA) {		if (!IsNSA) {
if (AddrSize > 8)		if (AddrSize > 12)
AddrSize = 16;		AddrSize = 16;
} else {		} else {
if (AddrSize > Info->VAddrDwords) {		if (AddrSize > Info->VAddrDwords) {
// The NSA encoding does not contain enough operands for the combination		// The NSA encoding does not contain enough operands for the combination
// of base opcode / dimension. Should this be an error?		// of base opcode / dimension. Should this be an error?
return MCDisassembler::Success;		return MCDisassembler::Success;
}		}
}		}
▲ Show 20 Lines • Show All 193 Lines • ▼ Show 20 Lines	MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
case AMDGPU::SGPR_128RegClassID:		case AMDGPU::SGPR_128RegClassID:
case AMDGPU::TTMP_128RegClassID:		case AMDGPU::TTMP_128RegClassID:
// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in		// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
// this bundle?		// this bundle?
case AMDGPU::SGPR_256RegClassID:		case AMDGPU::SGPR_256RegClassID:
case AMDGPU::TTMP_256RegClassID:		case AMDGPU::TTMP_256RegClassID:
// ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in		// ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
// this bundle?		// this bundle?
		case AMDGPU::SGPR_288RegClassID:
		case AMDGPU::TTMP_288RegClassID:
		case AMDGPU::SGPR_320RegClassID:
		case AMDGPU::TTMP_320RegClassID:
		case AMDGPU::SGPR_352RegClassID:
		case AMDGPU::TTMP_352RegClassID:
		case AMDGPU::SGPR_384RegClassID:
		case AMDGPU::TTMP_384RegClassID:
case AMDGPU::SGPR_512RegClassID:		case AMDGPU::SGPR_512RegClassID:
case AMDGPU::TTMP_512RegClassID:		case AMDGPU::TTMP_512RegClassID:
shift = 2;		shift = 2;
break;		break;
// ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in		// ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
// this bundle?		// this bundle?
default:		default:
llvm_unreachable("unhandled register class");		llvm_unreachable("unhandled register class");
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines
MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);		return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
}		}

MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);		return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
}		}

		MCOperand AMDGPUDisassembler::decodeOperand_AReg_288(unsigned Val) const {
		return createRegOperand(AMDGPU::AReg_288RegClassID, Val & 255);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_AReg_320(unsigned Val) const {
		return createRegOperand(AMDGPU::AReg_320RegClassID, Val & 255);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_AReg_352(unsigned Val) const {
		return createRegOperand(AMDGPU::AReg_352RegClassID, Val & 255);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_AReg_384(unsigned Val) const {
		return createRegOperand(AMDGPU::AReg_384RegClassID, Val & 255);
		}


MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);		return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
}		}

MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);		return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
}		}

Show All 32 Lines
MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_128RegClassID, Val);		return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
}		}

MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_256RegClassID, Val);		return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
}		}

		MCOperand AMDGPUDisassembler::decodeOperand_VReg_288(unsigned Val) const {
		return createRegOperand(AMDGPU::VReg_288RegClassID, Val);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_VReg_320(unsigned Val) const {
		return createRegOperand(AMDGPU::VReg_320RegClassID, Val);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_VReg_352(unsigned Val) const {
		return createRegOperand(AMDGPU::VReg_352RegClassID, Val);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_VReg_384(unsigned Val) const {
		return createRegOperand(AMDGPU::VReg_384RegClassID, Val);
		}

MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_512RegClassID, Val);		return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
}		}

MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);		return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);
}		}

Show All 34 Lines
MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
return decodeSrcOp(OPW128, Val);		return decodeSrcOp(OPW128, Val);
}		}

MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
return decodeDstOp(OPW256, Val);		return decodeDstOp(OPW256, Val);
}		}

		MCOperand AMDGPUDisassembler::decodeOperand_SReg_288(unsigned Val) const {
		return decodeDstOp(OPW288, Val);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_SReg_320(unsigned Val) const {
		return decodeDstOp(OPW320, Val);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_SReg_352(unsigned Val) const {
		return decodeDstOp(OPW352, Val);
		}

		MCOperand AMDGPUDisassembler::decodeOperand_SReg_384(unsigned Val) const {
		return decodeDstOp(OPW384, Val);
		}

MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {		MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
return decodeDstOp(OPW512, Val);		return decodeDstOp(OPW512, Val);
}		}

// Decode Literals for insts which always have a literal in the encoding		// Decode Literals for insts which always have a literal in the encoding
MCOperand		MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {		AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
if (HasLiteral) {		if (HasLiteral) {
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines	unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
case OPWV216:		case OPWV216:
return VGPR_32RegClassID;		return VGPR_32RegClassID;
case OPW64:		case OPW64:
case OPWV232: return VReg_64RegClassID;		case OPWV232: return VReg_64RegClassID;
case OPW96: return VReg_96RegClassID;		case OPW96: return VReg_96RegClassID;
case OPW128: return VReg_128RegClassID;		case OPW128: return VReg_128RegClassID;
case OPW160: return VReg_160RegClassID;		case OPW160: return VReg_160RegClassID;
case OPW256: return VReg_256RegClassID;		case OPW256: return VReg_256RegClassID;
		case OPW288: return VReg_288RegClassID;
		case OPW320: return VReg_320RegClassID;
		case OPW352: return VReg_352RegClassID;
		case OPW384: return VReg_384RegClassID;
case OPW512: return VReg_512RegClassID;		case OPW512: return VReg_512RegClassID;
case OPW1024: return VReg_1024RegClassID;		case OPW1024: return VReg_1024RegClassID;
}		}
}		}

unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {		unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
using namespace AMDGPU;		using namespace AMDGPU;

assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);		assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
switch (Width) {		switch (Width) {
default: // fall		default: // fall
case OPW32:		case OPW32:
case OPW16:		case OPW16:
case OPWV216:		case OPWV216:
return AGPR_32RegClassID;		return AGPR_32RegClassID;
case OPW64:		case OPW64:
case OPWV232: return AReg_64RegClassID;		case OPWV232: return AReg_64RegClassID;
case OPW96: return AReg_96RegClassID;		case OPW96: return AReg_96RegClassID;
case OPW128: return AReg_128RegClassID;		case OPW128: return AReg_128RegClassID;
case OPW160: return AReg_160RegClassID;		case OPW160: return AReg_160RegClassID;
case OPW256: return AReg_256RegClassID;		case OPW256: return AReg_256RegClassID;
		case OPW288: return AReg_288RegClassID;
		case OPW320: return AReg_320RegClassID;
		case OPW352: return AReg_352RegClassID;
		case OPW384: return AReg_384RegClassID;
case OPW512: return AReg_512RegClassID;		case OPW512: return AReg_512RegClassID;
case OPW1024: return AReg_1024RegClassID;		case OPW1024: return AReg_1024RegClassID;
}		}
}		}


unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {		unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
using namespace AMDGPU;		using namespace AMDGPU;

assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);		assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
switch (Width) {		switch (Width) {
default: // fall		default: // fall
case OPW32:		case OPW32:
case OPW16:		case OPW16:
case OPWV216:		case OPWV216:
return SGPR_32RegClassID;		return SGPR_32RegClassID;
case OPW64:		case OPW64:
case OPWV232: return SGPR_64RegClassID;		case OPWV232: return SGPR_64RegClassID;
case OPW96: return SGPR_96RegClassID;		case OPW96: return SGPR_96RegClassID;
case OPW128: return SGPR_128RegClassID;		case OPW128: return SGPR_128RegClassID;
case OPW160: return SGPR_160RegClassID;		case OPW160: return SGPR_160RegClassID;
case OPW256: return SGPR_256RegClassID;		case OPW256: return SGPR_256RegClassID;
		case OPW288: return SGPR_288RegClassID;
		case OPW320: return SGPR_320RegClassID;
		case OPW352: return SGPR_352RegClassID;
		case OPW384: return SGPR_384RegClassID;
case OPW512: return SGPR_512RegClassID;		case OPW512: return SGPR_512RegClassID;
}		}
}		}

unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {		unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
using namespace AMDGPU;		using namespace AMDGPU;

assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);		assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
switch (Width) {		switch (Width) {
default: // fall		default: // fall
case OPW32:		case OPW32:
case OPW16:		case OPW16:
case OPWV216:		case OPWV216:
return TTMP_32RegClassID;		return TTMP_32RegClassID;
case OPW64:		case OPW64:
case OPWV232: return TTMP_64RegClassID;		case OPWV232: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;		case OPW128: return TTMP_128RegClassID;
case OPW256: return TTMP_256RegClassID;		case OPW256: return TTMP_256RegClassID;
		case OPW288: return TTMP_288RegClassID;
		case OPW320: return TTMP_320RegClassID;
		case OPW352: return TTMP_352RegClassID;
		case OPW384: return TTMP_384RegClassID;
case OPW512: return TTMP_512RegClassID;		case OPW512: return TTMP_512RegClassID;
}		}
}		}

int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {		int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
using namespace AMDGPU::EncValues;		using namespace AMDGPU::EncValues;

unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;		unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
▲ Show 20 Lines • Show All 692 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp

Show First 20 Lines • Show All 493 Lines • ▼ Show 20 Lines	void SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) \|\|		if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_224RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_224RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) \|\|
		MRI.getRegClass(AMDGPU::AReg_288RegClassID).contains(Reg) \|\|
		MRI.getRegClass(AMDGPU::AReg_320RegClassID).contains(Reg) \|\|
		MRI.getRegClass(AMDGPU::AReg_352RegClassID).contains(Reg) \|\|
		MRI.getRegClass(AMDGPU::AReg_384RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) \|\|		MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) \|\|
MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))		MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
Enc \|= 512;		Enc \|= 512;

Op = Enc;		Op = Enc;
}		}

static bool needsPCRel(const MCExpr *Expr) {		static bool needsPCRel(const MCExpr *Expr) {
▲ Show 20 Lines • Show All 78 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Show First 20 Lines • Show All 952 Lines • ▼ Show 20 Lines	RegisterClass RegClass = !if(!le(NumWords, 0), ?,
!if(!eq(NumWords, 1), VGPR_32,		!if(!eq(NumWords, 1), VGPR_32,
!if(!eq(NumWords, 2), VReg_64,		!if(!eq(NumWords, 2), VReg_64,
!if(!eq(NumWords, 3), VReg_96,		!if(!eq(NumWords, 3), VReg_96,
!if(!eq(NumWords, 4), VReg_128,		!if(!eq(NumWords, 4), VReg_128,
!if(!eq(NumWords, 5), VReg_160,		!if(!eq(NumWords, 5), VReg_160,
!if(!eq(NumWords, 6), VReg_192,		!if(!eq(NumWords, 6), VReg_192,
!if(!eq(NumWords, 7), VReg_224,		!if(!eq(NumWords, 7), VReg_224,
!if(!le(NumWords, 8), VReg_256,		!if(!le(NumWords, 8), VReg_256,
!if(!le(NumWords, 16), VReg_512, ?))))))))));		!if(!le(NumWords, 9), VReg_288,
		!if(!le(NumWords, 10), VReg_320,
		!if(!le(NumWords, 11), VReg_352,
		!if(!le(NumWords, 12), VReg_384,
		!if(!le(NumWords, 16), VReg_512, ?))))))))))))));

// Whether the instruction variant with this vaddr size should be enabled for		// Whether the instruction variant with this vaddr size should be enabled for
// the auto-generated disassembler.		// the auto-generated disassembler.
bit Disassemble = enable_disasm;		bit Disassemble = enable_disasm;
}		}

// Return whether x is in lst.		// Return whether x is in lst.
class isIntInList<int x, list<int> lst> {		class isIntInList<int x, list<int> lst> {
Show All 32 Lines	class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample, bit isG16> {
// Generate machine instructions based on possible register classes for the		// Generate machine instructions based on possible register classes for the
// required numbers of address words. The disassembler defaults to the		// required numbers of address words. The disassembler defaults to the
// smallest register class.		// smallest register class.
list<MIMGAddrSize> MachineInstrs =		list<MIMGAddrSize> MachineInstrs =
!foldl([]<MIMGAddrSize>,		!foldl([]<MIMGAddrSize>,
!foreach(range,		!foreach(range,
// V4 is generated for V3 and V4		// V4 is generated for V3 and V4
// V8 is generated for V5 through V8		// V8 is generated for V5 through V8
// V16 is generated for V9 through V16		// V16 is generated for V13 through V16
[[1],[2],[3],[3,4],[5],[6],[7],[5,8],[9,16]],		[[1],[2],[3],[3,4],[5],[6],[7],[5,8],[9],[10],[11],[12],[13,16]],
MIMGAddrSizes_dw_range<range>),		MIMGAddrSizes_dw_range<range>),
lhs, dw,		lhs, dw,
!if(isRangeInList<dw.Min, dw.Max, AllNumAddrWords>.ret,		!if(isRangeInList<dw.Min, dw.Max, AllNumAddrWords>.ret,
!listconcat(lhs, [MIMGAddrSize<dw.Max, !empty(lhs)>]),		!listconcat(lhs, [MIMGAddrSize<dw.Max, !empty(lhs)>]),
lhs));		lhs));

// For NSA, generate machine instructions for all possible numbers of words		// For NSA, generate machine instructions for all possible numbers of words
// except 1 (which is already covered by the non-NSA case).		// except 1 (which is already covered by the non-NSA case).
▲ Show 20 Lines • Show All 522 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 114 Lines • ▼ Show 20 Lines	SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));		addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));

addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);		addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));		addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));

addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);		addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));		addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));

		addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
		addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));

		addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
		addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));

		addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
		addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));

		addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
		addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));

addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);		addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));		addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));

addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);		addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));		addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));

addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);		addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));		addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
Show All 22 Lines	SITargetLowering::SITargetLowering(const TargetMachine &TM,
// really produce a 1-bit result. Any copy/extend from these will turn into a		// really produce a 1-bit result. Any copy/extend from these will turn into a
// select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as		// select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as
// it's what most targets use.		// it's what most targets use.
setBooleanContents(ZeroOrOneBooleanContent);		setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);		setBooleanVectorContents(ZeroOrOneBooleanContent);

// We need to custom lower vector stores from local memory		// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD,		setOperationAction(ISD::LOAD,
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,		{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,		MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
MVT::v32i32},		MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
		MVT::i1, MVT::v32i32},
Custom);		Custom);

setOperationAction(ISD::STORE,		setOperationAction(ISD::STORE,
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,		{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,		MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
MVT::v32i32},		MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
		MVT::i1, MVT::v32i32},
Custom);		Custom);

setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);		setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);		setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);		setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);		setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);		setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand);		setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand);
Show All 26 Lines	SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC,		setOperationAction(ISD::SELECT_CC,
{MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand);		{MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand);

setOperationAction(ISD::SETCC, MVT::i1, Promote);		setOperationAction(ISD::SETCC, MVT::i1, Promote);
setOperationAction(ISD::SETCC, {MVT::v2i1, MVT::v4i1}, Expand);		setOperationAction(ISD::SETCC, {MVT::v2i1, MVT::v4i1}, Expand);
AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);		AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);

setOperationAction(ISD::TRUNCATE,		setOperationAction(ISD::TRUNCATE,
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,		{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32},		MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
		MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32},
Expand);		Expand);
setOperationAction(ISD::FP_ROUND,		setOperationAction(ISD::FP_ROUND,
{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,		{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},		MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v9f32,
		MVT::v10f32, MVT::v11f32, MVT::v12f32, MVT::v16f32},
Expand);		Expand);

setOperationAction(ISD::SIGN_EXTEND_INREG,		setOperationAction(ISD::SIGN_EXTEND_INREG,
{MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,		{MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,
MVT::v3i16, MVT::v4i16, MVT::Other},		MVT::v3i16, MVT::v4i16, MVT::Other},
Custom);		Custom);

setOperationAction(ISD::BRCOND, MVT::Other, Custom);		setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Show All 9 Lines

#if 0		#if 0
setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i64, Legal);		setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i64, Legal);
#endif		#endif

// We only support LOAD/STORE and vector manipulation ops for vectors		// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.		// with > 4 elements.
for (MVT VT :		for (MVT VT :
{MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64,		{MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32,
MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v3i64, MVT::v3f64,		MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32,
MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64,		MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16,
MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16,		MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32}) {		MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16,
		MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v16i64, MVT::v16f64,
		MVT::v32i32, MVT::v32f32}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {		for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {		switch (Op) {
case ISD::LOAD:		case ISD::LOAD:
case ISD::STORE:		case ISD::STORE:
case ISD::BUILD_VECTOR:		case ISD::BUILD_VECTOR:
case ISD::BITCAST:		case ISD::BITCAST:
case ISD::UNDEF:		case ISD::UNDEF:
case ISD::EXTRACT_VECTOR_ELT:		case ISD::EXTRACT_VECTOR_ELT:
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines	setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
Custom);		Custom);

// Deal with vec3 vector operations when widened to vec4.		// Deal with vec3 vector operations when widened to vec4.
setOperationAction(ISD::INSERT_SUBVECTOR,		setOperationAction(ISD::INSERT_SUBVECTOR,
{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32}, Custom);		{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32}, Custom);

// Deal with vec5/6/7 vector operations when widened to vec8.		// Deal with vec5/6/7 vector operations when widened to vec8.
setOperationAction(ISD::INSERT_SUBVECTOR,		setOperationAction(ISD::INSERT_SUBVECTOR,
{MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,		{MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},		MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
		MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
		MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
Custom);		Custom);

// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,		// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
// and output demarshalling		// and output demarshalling
setOperationAction(ISD::ATOMIC_CMP_SWAP, {MVT::i32, MVT::i64}, Custom);		setOperationAction(ISD::ATOMIC_CMP_SWAP, {MVT::i32, MVT::i64}, Custom);

// We can't return success/failure, only the old value,		// We can't return success/failure, only the old value,
// let LLVM add the comparison		// let LLVM add the comparison
▲ Show 20 Lines • Show All 3,846 Lines • ▼ Show 20 Lines	BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
.addImm(MFI->getLDSSize());		.addImm(MFI->getLDSSize());
MI.eraseFromParent();		MI.eraseFromParent();
return BB;		return BB;
}		}
case AMDGPU::SI_INDIRECT_SRC_V1:		case AMDGPU::SI_INDIRECT_SRC_V1:
case AMDGPU::SI_INDIRECT_SRC_V2:		case AMDGPU::SI_INDIRECT_SRC_V2:
case AMDGPU::SI_INDIRECT_SRC_V4:		case AMDGPU::SI_INDIRECT_SRC_V4:
case AMDGPU::SI_INDIRECT_SRC_V8:		case AMDGPU::SI_INDIRECT_SRC_V8:
		case AMDGPU::SI_INDIRECT_SRC_V9:
		case AMDGPU::SI_INDIRECT_SRC_V10:
		case AMDGPU::SI_INDIRECT_SRC_V11:
		case AMDGPU::SI_INDIRECT_SRC_V12:
case AMDGPU::SI_INDIRECT_SRC_V16:		case AMDGPU::SI_INDIRECT_SRC_V16:
case AMDGPU::SI_INDIRECT_SRC_V32:		case AMDGPU::SI_INDIRECT_SRC_V32:
return emitIndirectSrc(MI, BB, getSubtarget());		return emitIndirectSrc(MI, BB, getSubtarget());
case AMDGPU::SI_INDIRECT_DST_V1:		case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:		case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:		case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:		case AMDGPU::SI_INDIRECT_DST_V8:
		case AMDGPU::SI_INDIRECT_DST_V9:
		case AMDGPU::SI_INDIRECT_DST_V10:
		case AMDGPU::SI_INDIRECT_DST_V11:
		case AMDGPU::SI_INDIRECT_DST_V12:
case AMDGPU::SI_INDIRECT_DST_V16:		case AMDGPU::SI_INDIRECT_DST_V16:
case AMDGPU::SI_INDIRECT_DST_V32:		case AMDGPU::SI_INDIRECT_DST_V32:
return emitIndirectDst(MI, BB, getSubtarget());		return emitIndirectDst(MI, BB, getSubtarget());
case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:		case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
case AMDGPU::SI_KILL_I1_PSEUDO:		case AMDGPU::SI_KILL_I1_PSEUDO:
return splitKillBlock(MI, BB);		return splitKillBlock(MI, BB);
case AMDGPU::V_CNDMASK_B64_PSEUDO: {		case AMDGPU::V_CNDMASK_B64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();		MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
▲ Show 20 Lines • Show All 1,917 Lines • ▼ Show 20 Lines
}		}

static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,		static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
ArrayRef<SDValue> Elts) {		ArrayRef<SDValue> Elts) {
assert(!Elts.empty());		assert(!Elts.empty());
MVT Type;		MVT Type;
unsigned NumElts = Elts.size();		unsigned NumElts = Elts.size();

if (NumElts <= 8) {		if (NumElts <= 12) {
Type = MVT::getVectorVT(MVT::f32, NumElts);		Type = MVT::getVectorVT(MVT::f32, NumElts);
} else {		} else {
assert(Elts.size() <= 16);		assert(Elts.size() <= 16);
Type = MVT::v16f32;		Type = MVT::v16f32;
NumElts = 16;		NumElts = 16;
}		}

SmallVector<SDValue, 16> VecElts(NumElts);		SmallVector<SDValue, 16> VecElts(NumElts);
▲ Show 20 Lines • Show All 1,533 Lines • ▼ Show 20 Lines	if (UseNSA) {
IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA		IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx10NSA,		: AMDGPU::MIMGEncGfx10NSA,
NumVDataDwords, NumVAddrDwords);		NumVDataDwords, NumVAddrDwords);
} else {		} else {
Opcode =		Opcode =
AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],		AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default		IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
: AMDGPU::MIMGEncGfx10Default,		: AMDGPU::MIMGEncGfx10Default,
NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));		NumVDataDwords, NumVAddrDwords);
}		}
assert(Opcode != -1);		assert(Opcode != -1);

SmallVector<SDValue, 16> Ops;		SmallVector<SDValue, 16> Ops;

auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) {		auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) {
SmallVector<SDValue, 3> Lanes;		SmallVector<SDValue, 3> Lanes;
DAG.ExtractVectorElements(Op, Lanes, 0, 3);		DAG.ExtractVectorElements(Op, Lanes, 0, 3);
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	if (UseNSA && IsGFX11Plus) {
Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));		Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
packLanes(RayOrigin, true);		packLanes(RayOrigin, true);
packLanes(RayDir, true);		packLanes(RayDir, true);
packLanes(RayInvDir, false);		packLanes(RayInvDir, false);
}		}

if (!UseNSA) {		if (!UseNSA) {
// Build a single vector containing all the operands so far prepared.		// Build a single vector containing all the operands so far prepared.
if (NumVAddrDwords > 8) {		if (NumVAddrDwords > 12) {
SDValue Undef = DAG.getUNDEF(MVT::i32);		SDValue Undef = DAG.getUNDEF(MVT::i32);
Ops.append(16 - Ops.size(), Undef);		Ops.append(16 - Ops.size(), Undef);
}		}
assert(Ops.size() == 8 \|\| Ops.size() == 16);		assert(Ops.size() >= 8 && Ops.size() <= 12);
SDValue MergedOps = DAG.getBuildVector(		SDValue MergedOps = DAG.getBuildVector(
Ops.size() == 16 ? MVT::v16i32 : MVT::v8i32, DL, Ops);		MVT::getVectorVT(MVT::i32, Ops.size()), DL, Ops);
Ops.clear();		Ops.clear();
Ops.push_back(MergedOps);		Ops.push_back(MergedOps);
}		}

Ops.push_back(TDescr);		Ops.push_back(TDescr);
if (IsA16)		if (IsA16)
Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));		Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
Ops.push_back(M->getChain());		Ops.push_back(M->getChain());
▲ Show 20 Lines • Show All 4,623 Lines • ▼ Show 20 Lines	static int getAlignedAGPRClassID(unsigned UnalignedClassID) {
case AMDGPU::VReg_160RegClassID:		case AMDGPU::VReg_160RegClassID:
return AMDGPU::VReg_160_Align2RegClassID;		return AMDGPU::VReg_160_Align2RegClassID;
case AMDGPU::VReg_192RegClassID:		case AMDGPU::VReg_192RegClassID:
return AMDGPU::VReg_192_Align2RegClassID;		return AMDGPU::VReg_192_Align2RegClassID;
case AMDGPU::VReg_224RegClassID:		case AMDGPU::VReg_224RegClassID:
return AMDGPU::VReg_224_Align2RegClassID;		return AMDGPU::VReg_224_Align2RegClassID;
case AMDGPU::VReg_256RegClassID:		case AMDGPU::VReg_256RegClassID:
return AMDGPU::VReg_256_Align2RegClassID;		return AMDGPU::VReg_256_Align2RegClassID;
		case AMDGPU::VReg_288RegClassID:
		return AMDGPU::VReg_288_Align2RegClassID;
		case AMDGPU::VReg_320RegClassID:
		return AMDGPU::VReg_320_Align2RegClassID;
		case AMDGPU::VReg_352RegClassID:
		return AMDGPU::VReg_352_Align2RegClassID;
		case AMDGPU::VReg_384RegClassID:
		return AMDGPU::VReg_384_Align2RegClassID;
case AMDGPU::VReg_512RegClassID:		case AMDGPU::VReg_512RegClassID:
return AMDGPU::VReg_512_Align2RegClassID;		return AMDGPU::VReg_512_Align2RegClassID;
case AMDGPU::VReg_1024RegClassID:		case AMDGPU::VReg_1024RegClassID:
return AMDGPU::VReg_1024_Align2RegClassID;		return AMDGPU::VReg_1024_Align2RegClassID;
case AMDGPU::AReg_64RegClassID:		case AMDGPU::AReg_64RegClassID:
return AMDGPU::AReg_64_Align2RegClassID;		return AMDGPU::AReg_64_Align2RegClassID;
case AMDGPU::AReg_96RegClassID:		case AMDGPU::AReg_96RegClassID:
return AMDGPU::AReg_96_Align2RegClassID;		return AMDGPU::AReg_96_Align2RegClassID;
▲ Show 20 Lines • Show All 782 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,294 Lines • ▼ Show 20 Lines	if (IsIndirectSrc) {
if (VecSize <= 96) // 12 bytes		if (VecSize <= 96) // 12 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
if (VecSize <= 128) // 16 bytes		if (VecSize <= 128) // 16 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
if (VecSize <= 160) // 20 bytes		if (VecSize <= 160) // 20 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
if (VecSize <= 256) // 32 bytes		if (VecSize <= 256) // 32 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
		if (VecSize <= 288) // 36 bytes
		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
		if (VecSize <= 320) // 40 bytes
		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
		if (VecSize <= 352) // 44 bytes
		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
		if (VecSize <= 384) // 48 bytes
		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
if (VecSize <= 512) // 64 bytes		if (VecSize <= 512) // 64 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
if (VecSize <= 1024) // 128 bytes		if (VecSize <= 1024) // 128 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);		return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);

llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos");		llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos");
}		}

if (VecSize <= 32) // 4 bytes		if (VecSize <= 32) // 4 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
if (VecSize <= 64) // 8 bytes		if (VecSize <= 64) // 8 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
if (VecSize <= 96) // 12 bytes		if (VecSize <= 96) // 12 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
if (VecSize <= 128) // 16 bytes		if (VecSize <= 128) // 16 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
if (VecSize <= 160) // 20 bytes		if (VecSize <= 160) // 20 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
if (VecSize <= 256) // 32 bytes		if (VecSize <= 256) // 32 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
		if (VecSize <= 288) // 36 bytes
		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
		if (VecSize <= 320) // 40 bytes
		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
		if (VecSize <= 352) // 44 bytes
		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
		if (VecSize <= 384) // 48 bytes
		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
if (VecSize <= 512) // 64 bytes		if (VecSize <= 512) // 64 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
if (VecSize <= 1024) // 128 bytes		if (VecSize <= 1024) // 128 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);		return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);

llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos");		llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos");
}		}

static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) {		static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) {
if (VecSize <= 32) // 4 bytes		if (VecSize <= 32) // 4 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
if (VecSize <= 64) // 8 bytes		if (VecSize <= 64) // 8 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
if (VecSize <= 96) // 12 bytes		if (VecSize <= 96) // 12 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
if (VecSize <= 128) // 16 bytes		if (VecSize <= 128) // 16 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
if (VecSize <= 160) // 20 bytes		if (VecSize <= 160) // 20 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
if (VecSize <= 256) // 32 bytes		if (VecSize <= 256) // 32 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
		if (VecSize <= 288) // 36 bytes
		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
		if (VecSize <= 320) // 40 bytes
		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
		if (VecSize <= 352) // 44 bytes
		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
		if (VecSize <= 384) // 48 bytes
		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
if (VecSize <= 512) // 64 bytes		if (VecSize <= 512) // 64 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
if (VecSize <= 1024) // 128 bytes		if (VecSize <= 1024) // 128 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;		return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;

llvm_unreachable("unsupported size for IndirectRegWrite pseudos");		llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
}		}

▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
case 20:		case 20:
return AMDGPU::SI_SPILL_S160_SAVE;		return AMDGPU::SI_SPILL_S160_SAVE;
case 24:		case 24:
return AMDGPU::SI_SPILL_S192_SAVE;		return AMDGPU::SI_SPILL_S192_SAVE;
case 28:		case 28:
return AMDGPU::SI_SPILL_S224_SAVE;		return AMDGPU::SI_SPILL_S224_SAVE;
case 32:		case 32:
return AMDGPU::SI_SPILL_S256_SAVE;		return AMDGPU::SI_SPILL_S256_SAVE;
		case 36:
		return AMDGPU::SI_SPILL_S288_SAVE;
		case 40:
		return AMDGPU::SI_SPILL_S320_SAVE;
		case 44:
		return AMDGPU::SI_SPILL_S352_SAVE;
		case 48:
		return AMDGPU::SI_SPILL_S384_SAVE;
case 64:		case 64:
return AMDGPU::SI_SPILL_S512_SAVE;		return AMDGPU::SI_SPILL_S512_SAVE;
case 128:		case 128:
return AMDGPU::SI_SPILL_S1024_SAVE;		return AMDGPU::SI_SPILL_S1024_SAVE;
default:		default:
llvm_unreachable("unknown register size");		llvm_unreachable("unknown register size");
}		}
}		}
Show All 11 Lines	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
case 20:		case 20:
return AMDGPU::SI_SPILL_V160_SAVE;		return AMDGPU::SI_SPILL_V160_SAVE;
case 24:		case 24:
return AMDGPU::SI_SPILL_V192_SAVE;		return AMDGPU::SI_SPILL_V192_SAVE;
case 28:		case 28:
return AMDGPU::SI_SPILL_V224_SAVE;		return AMDGPU::SI_SPILL_V224_SAVE;
case 32:		case 32:
return AMDGPU::SI_SPILL_V256_SAVE;		return AMDGPU::SI_SPILL_V256_SAVE;
		case 36:
		return AMDGPU::SI_SPILL_S288_SAVE;
		case 40:
		return AMDGPU::SI_SPILL_S320_SAVE;
		case 44:
		return AMDGPU::SI_SPILL_S352_SAVE;
		case 48:
		return AMDGPU::SI_SPILL_S384_SAVE;
case 64:		case 64:
return AMDGPU::SI_SPILL_V512_SAVE;		return AMDGPU::SI_SPILL_V512_SAVE;
case 128:		case 128:
return AMDGPU::SI_SPILL_V1024_SAVE;		return AMDGPU::SI_SPILL_V1024_SAVE;
default:		default:
llvm_unreachable("unknown register size");		llvm_unreachable("unknown register size");
}		}
}		}
▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
case 20:		case 20:
return AMDGPU::SI_SPILL_S160_RESTORE;		return AMDGPU::SI_SPILL_S160_RESTORE;
case 24:		case 24:
return AMDGPU::SI_SPILL_S192_RESTORE;		return AMDGPU::SI_SPILL_S192_RESTORE;
case 28:		case 28:
return AMDGPU::SI_SPILL_S224_RESTORE;		return AMDGPU::SI_SPILL_S224_RESTORE;
case 32:		case 32:
return AMDGPU::SI_SPILL_S256_RESTORE;		return AMDGPU::SI_SPILL_S256_RESTORE;
		case 36:
		return AMDGPU::SI_SPILL_S288_RESTORE;
		case 40:
		return AMDGPU::SI_SPILL_S320_RESTORE;
		case 44:
		return AMDGPU::SI_SPILL_S352_RESTORE;
		case 48:
		return AMDGPU::SI_SPILL_S384_RESTORE;
case 64:		case 64:
return AMDGPU::SI_SPILL_S512_RESTORE;		return AMDGPU::SI_SPILL_S512_RESTORE;
case 128:		case 128:
return AMDGPU::SI_SPILL_S1024_RESTORE;		return AMDGPU::SI_SPILL_S1024_RESTORE;
default:		default:
llvm_unreachable("unknown register size");		llvm_unreachable("unknown register size");
}		}
}		}
Show All 11 Lines	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
case 20:		case 20:
return AMDGPU::SI_SPILL_V160_RESTORE;		return AMDGPU::SI_SPILL_V160_RESTORE;
case 24:		case 24:
return AMDGPU::SI_SPILL_V192_RESTORE;		return AMDGPU::SI_SPILL_V192_RESTORE;
case 28:		case 28:
return AMDGPU::SI_SPILL_V224_RESTORE;		return AMDGPU::SI_SPILL_V224_RESTORE;
case 32:		case 32:
return AMDGPU::SI_SPILL_V256_RESTORE;		return AMDGPU::SI_SPILL_V256_RESTORE;
		case 36:
		return AMDGPU::SI_SPILL_V288_RESTORE;
		case 40:
		return AMDGPU::SI_SPILL_V320_RESTORE;
		case 44:
		return AMDGPU::SI_SPILL_V352_RESTORE;
		case 48:
		return AMDGPU::SI_SPILL_V384_RESTORE;
case 64:		case 64:
return AMDGPU::SI_SPILL_V512_RESTORE;		return AMDGPU::SI_SPILL_V512_RESTORE;
case 128:		case 128:
return AMDGPU::SI_SPILL_V1024_RESTORE;		return AMDGPU::SI_SPILL_V1024_RESTORE;
default:		default:
llvm_unreachable("unknown register size");		llvm_unreachable("unknown register size");
}		}
}		}
Show All 11 Lines	static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
case 20:		case 20:
return AMDGPU::SI_SPILL_A160_RESTORE;		return AMDGPU::SI_SPILL_A160_RESTORE;
case 24:		case 24:
return AMDGPU::SI_SPILL_A192_RESTORE;		return AMDGPU::SI_SPILL_A192_RESTORE;
case 28:		case 28:
return AMDGPU::SI_SPILL_A224_RESTORE;		return AMDGPU::SI_SPILL_A224_RESTORE;
case 32:		case 32:
return AMDGPU::SI_SPILL_A256_RESTORE;		return AMDGPU::SI_SPILL_A256_RESTORE;
		case 36:
		return AMDGPU::SI_SPILL_A288_RESTORE;
		case 40:
		return AMDGPU::SI_SPILL_A320_RESTORE;
		case 44:
		return AMDGPU::SI_SPILL_A352_RESTORE;
		case 48:
		return AMDGPU::SI_SPILL_A384_RESTORE;
case 64:		case 64:
return AMDGPU::SI_SPILL_A512_RESTORE;		return AMDGPU::SI_SPILL_A512_RESTORE;
case 128:		case 128:
return AMDGPU::SI_SPILL_A1024_RESTORE;		return AMDGPU::SI_SPILL_A1024_RESTORE;
default:		default:
llvm_unreachable("unknown register size");		llvm_unreachable("unknown register size");
}		}
}		}
Show All 11 Lines	static unsigned getAVSpillRestoreOpcode(unsigned Size) {
case 20:		case 20:
return AMDGPU::SI_SPILL_AV160_RESTORE;		return AMDGPU::SI_SPILL_AV160_RESTORE;
case 24:		case 24:
return AMDGPU::SI_SPILL_AV192_RESTORE;		return AMDGPU::SI_SPILL_AV192_RESTORE;
case 28:		case 28:
return AMDGPU::SI_SPILL_AV224_RESTORE;		return AMDGPU::SI_SPILL_AV224_RESTORE;
case 32:		case 32:
return AMDGPU::SI_SPILL_AV256_RESTORE;		return AMDGPU::SI_SPILL_AV256_RESTORE;
		case 36:
		return AMDGPU::SI_SPILL_AV288_RESTORE;
		case 40:
		return AMDGPU::SI_SPILL_AV320_RESTORE;
		case 44:
		return AMDGPU::SI_SPILL_AV352_RESTORE;
		case 48:
		return AMDGPU::SI_SPILL_AV384_RESTORE;
case 64:		case 64:
return AMDGPU::SI_SPILL_AV512_RESTORE;		return AMDGPU::SI_SPILL_AV512_RESTORE;
case 128:		case 128:
return AMDGPU::SI_SPILL_AV1024_RESTORE;		return AMDGPU::SI_SPILL_AV1024_RESTORE;
default:		default:
llvm_unreachable("unknown register size");		llvm_unreachable("unknown register size");
}		}
}		}
▲ Show 20 Lines • Show All 289 Lines • ▼ Show 20 Lines	case AMDGPU::V_SET_INACTIVE_B64: {
break;		break;
}		}
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:		case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:		case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:		case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:		case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:		case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:		case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:		case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
Show All 35 Lines	case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
break;		break;
}		}
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {		case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
assert(ST.useVGPRIndexMode());		assert(ST.useVGPRIndexMode());
Register VecReg = MI.getOperand(0).getReg();		Register VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();		bool IsUndef = MI.getOperand(1).isUndef();
Register Idx = MI.getOperand(3).getReg();		Register Idx = MI.getOperand(3).getReg();
Register SubReg = MI.getOperand(4).getImm();		Register SubReg = MI.getOperand(4).getImm();

Show All 23 Lines	case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
break;		break;
}		}
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {		case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
assert(ST.useVGPRIndexMode());		assert(ST.useVGPRIndexMode());
Register Dst = MI.getOperand(0).getReg();		Register Dst = MI.getOperand(0).getReg();
Register VecReg = MI.getOperand(1).getReg();		Register VecReg = MI.getOperand(1).getReg();
bool IsUndef = MI.getOperand(1).isUndef();		bool IsUndef = MI.getOperand(1).isUndef();
Register Idx = MI.getOperand(2).getReg();		Register Idx = MI.getOperand(2).getReg();
Register SubReg = MI.getOperand(3).getImm();		Register SubReg = MI.getOperand(3).getImm();
▲ Show 20 Lines • Show All 2,451 Lines • ▼ Show 20 Lines	if (DimOp) {
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16());		AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16());

unsigned VAddrWords;		unsigned VAddrWords;
if (IsNSA) {		if (IsNSA) {
VAddrWords = SRsrcIdx - VAddr0Idx;		VAddrWords = SRsrcIdx - VAddr0Idx;
} else {		} else {
const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);		const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);
VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;		VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
if (AddrWords > 8)		if (AddrWords > 12)
AddrWords = 16;		AddrWords = 16;
}		}

if (VAddrWords != AddrWords) {		if (VAddrWords != AddrWords) {
LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords		LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords
<< " but got " << VAddrWords << "\n");		<< " but got " << VAddrWords << "\n");
ErrInfo = "bad vaddr size";		ErrInfo = "bad vaddr size";
return false;		return false;
▲ Show 20 Lines • Show All 3,973 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstructions.td

Show First 20 Lines • Show All 644 Lines • ▼ Show 20 Lines	class SI_INDIRECT_DST<RegisterClass rc> : VPseudoInstSI <
let Constraints = "$src = $vdst";		let Constraints = "$src = $vdst";
let usesCustomInserter = 1;		let usesCustomInserter = 1;
}		}

def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;		def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;		def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;		def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;		def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
		def SI_INDIRECT_SRC_V9 : SI_INDIRECT_SRC<VReg_288>;
		def SI_INDIRECT_SRC_V10 : SI_INDIRECT_SRC<VReg_320>;
		def SI_INDIRECT_SRC_V11 : SI_INDIRECT_SRC<VReg_352>;
		def SI_INDIRECT_SRC_V12 : SI_INDIRECT_SRC<VReg_384>;
def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;		def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
def SI_INDIRECT_SRC_V32 : SI_INDIRECT_SRC<VReg_1024>;		def SI_INDIRECT_SRC_V32 : SI_INDIRECT_SRC<VReg_1024>;

def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;		def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;		def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;		def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;		def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
		def SI_INDIRECT_DST_V9 : SI_INDIRECT_DST<VReg_288>;
		def SI_INDIRECT_DST_V10 : SI_INDIRECT_DST<VReg_320>;
		def SI_INDIRECT_DST_V11 : SI_INDIRECT_DST<VReg_352>;
		def SI_INDIRECT_DST_V12 : SI_INDIRECT_DST<VReg_384>;
def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;		def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
def SI_INDIRECT_DST_V32 : SI_INDIRECT_DST<VReg_1024>;		def SI_INDIRECT_DST_V32 : SI_INDIRECT_DST<VReg_1024>;

} // End Uses = [EXEC], Defs = [M0, EXEC]		} // End Uses = [EXEC], Defs = [M0, EXEC]

// This is a pseudo variant of the v_movreld_b32 instruction in which the		// This is a pseudo variant of the v_movreld_b32 instruction in which the
// vector operand appears only twice, once as def and once as use. Using this		// vector operand appears only twice, once as def and once as use. Using this
// pseudo avoids problems with the Two Address instructions pass.		// pseudo avoids problems with the Two Address instructions pass.
Show All 25 Lines	class S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo<RegisterClass rc> :
S_INDIRECT_REG_WRITE_MOVREL_pseudo<rc, SSrc_b64>;		S_INDIRECT_REG_WRITE_MOVREL_pseudo<rc, SSrc_b64>;

def V_INDIRECT_REG_WRITE_MOVREL_B32_V1 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VGPR_32>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V1 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VGPR_32>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V2 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_64>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V2 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_64>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V3 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_96>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V3 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_96>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V4 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_128>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V4 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_128>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V5 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_160>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V5 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_160>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V8 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_256>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V8 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_256>;
		def V_INDIRECT_REG_WRITE_MOVREL_B32_V9 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_288>;
		def V_INDIRECT_REG_WRITE_MOVREL_B32_V10 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_320>;
		def V_INDIRECT_REG_WRITE_MOVREL_B32_V11 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_352>;
		def V_INDIRECT_REG_WRITE_MOVREL_B32_V12 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_384>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V16 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_512>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V16 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_512>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V32 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_1024>;		def V_INDIRECT_REG_WRITE_MOVREL_B32_V32 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_1024>;

def S_INDIRECT_REG_WRITE_MOVREL_B32_V1 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_32>;		def S_INDIRECT_REG_WRITE_MOVREL_B32_V1 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_32>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V2 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_64>;		def S_INDIRECT_REG_WRITE_MOVREL_B32_V2 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_64>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V3 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_96>;		def S_INDIRECT_REG_WRITE_MOVREL_B32_V3 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_96>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V4 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_128>;		def S_INDIRECT_REG_WRITE_MOVREL_B32_V4 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_128>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V5 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_160>;		def S_INDIRECT_REG_WRITE_MOVREL_B32_V5 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_160>;
Show All 21 Lines
}		}

def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VGPR_32>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VGPR_32>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_64>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_64>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_96>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_96>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_128>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_128>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_160>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_160>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_256>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_256>;
		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_288>;
		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_320>;
		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_352>;
		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_384>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_512>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_512>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_1024>;		def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_1024>;

class V_INDIRECT_REG_READ_GPR_IDX_pseudo<RegisterClass rc> : PseudoInstSI <		class V_INDIRECT_REG_READ_GPR_IDX_pseudo<RegisterClass rc> : PseudoInstSI <
(outs VGPR_32:$vdst), (ins rc:$vsrc, SSrc_b32:$idx, i32imm:$subreg)> {		(outs VGPR_32:$vdst), (ins rc:$vsrc, SSrc_b32:$idx, i32imm:$subreg)> {
let VALU = 1;		let VALU = 1;
let Uses = [M0, EXEC];		let Uses = [M0, EXEC];
let Defs = [M0];		let Defs = [M0];
}		}

def V_INDIRECT_REG_READ_GPR_IDX_B32_V1 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VGPR_32>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V1 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VGPR_32>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V2 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_64>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V2 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_64>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V3 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_96>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V3 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_96>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V4 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_128>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V4 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_128>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V5 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_160>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V5 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_160>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V8 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_256>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V8 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_256>;
		def V_INDIRECT_REG_READ_GPR_IDX_B32_V9 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_288>;
		def V_INDIRECT_REG_READ_GPR_IDX_B32_V10 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_320>;
		def V_INDIRECT_REG_READ_GPR_IDX_B32_V11 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_352>;
		def V_INDIRECT_REG_READ_GPR_IDX_B32_V12 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_384>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V16 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_512>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V16 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_512>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V32 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_1024>;		def V_INDIRECT_REG_READ_GPR_IDX_B32_V32 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_1024>;

multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {		multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {		let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {
def _SAVE : PseudoInstSI <		def _SAVE : PseudoInstSI <
(outs),		(outs),
(ins sgpr_class:$data, i32imm:$addr)> {		(ins sgpr_class:$data, i32imm:$addr)> {
Show All 17 Lines
defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;		defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;		defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S96 : SI_SPILL_SGPR <SReg_96>;		defm SI_SPILL_S96 : SI_SPILL_SGPR <SReg_96>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;		defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;		defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
defm SI_SPILL_S192 : SI_SPILL_SGPR <SReg_192>;		defm SI_SPILL_S192 : SI_SPILL_SGPR <SReg_192>;
defm SI_SPILL_S224 : SI_SPILL_SGPR <SReg_224>;		defm SI_SPILL_S224 : SI_SPILL_SGPR <SReg_224>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;		defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
		defm SI_SPILL_S288 : SI_SPILL_SGPR <SReg_288>;
		defm SI_SPILL_S320 : SI_SPILL_SGPR <SReg_320>;
		defm SI_SPILL_S352 : SI_SPILL_SGPR <SReg_352>;
		defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;		defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;		defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;

// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register		// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
// needs to be used and an extra instruction to move between VGPR and AGPR.		// needs to be used and an extra instruction to move between VGPR and AGPR.
// UsesTmp adds to the total size of an expanded spill in this case.		// UsesTmp adds to the total size of an expanded spill in this case.
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, bit UsesTmp = 0> {		multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, bit UsesTmp = 0> {
let UseNamedOperandTable = 1, VGPRSpill = 1,		let UseNamedOperandTable = 1, VGPRSpill = 1,
Show All 28 Lines
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;		defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;		defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;		defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;		defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;		defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
defm SI_SPILL_V192 : SI_SPILL_VGPR <VReg_192>;		defm SI_SPILL_V192 : SI_SPILL_VGPR <VReg_192>;
defm SI_SPILL_V224 : SI_SPILL_VGPR <VReg_224>;		defm SI_SPILL_V224 : SI_SPILL_VGPR <VReg_224>;
defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;		defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
		defm SI_SPILL_V288 : SI_SPILL_VGPR <VReg_288>;
		defm SI_SPILL_V320 : SI_SPILL_VGPR <VReg_320>;
		defm SI_SPILL_V352 : SI_SPILL_VGPR <VReg_352>;
		defm SI_SPILL_V384 : SI_SPILL_VGPR <VReg_384>;
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;		defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;		defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;

defm SI_SPILL_A32 : SI_SPILL_VGPR <AGPR_32, 1>;		defm SI_SPILL_A32 : SI_SPILL_VGPR <AGPR_32, 1>;
defm SI_SPILL_A64 : SI_SPILL_VGPR <AReg_64, 1>;		defm SI_SPILL_A64 : SI_SPILL_VGPR <AReg_64, 1>;
defm SI_SPILL_A96 : SI_SPILL_VGPR <AReg_96, 1>;		defm SI_SPILL_A96 : SI_SPILL_VGPR <AReg_96, 1>;
defm SI_SPILL_A128 : SI_SPILL_VGPR <AReg_128, 1>;		defm SI_SPILL_A128 : SI_SPILL_VGPR <AReg_128, 1>;
defm SI_SPILL_A160 : SI_SPILL_VGPR <AReg_160, 1>;		defm SI_SPILL_A160 : SI_SPILL_VGPR <AReg_160, 1>;
defm SI_SPILL_A192 : SI_SPILL_VGPR <AReg_192, 1>;		defm SI_SPILL_A192 : SI_SPILL_VGPR <AReg_192, 1>;
defm SI_SPILL_A224 : SI_SPILL_VGPR <AReg_224, 1>;		defm SI_SPILL_A224 : SI_SPILL_VGPR <AReg_224, 1>;
defm SI_SPILL_A256 : SI_SPILL_VGPR <AReg_256, 1>;		defm SI_SPILL_A256 : SI_SPILL_VGPR <AReg_256, 1>;
		defm SI_SPILL_A288 : SI_SPILL_VGPR <AReg_288, 1>;
		defm SI_SPILL_A320 : SI_SPILL_VGPR <AReg_320, 1>;
		defm SI_SPILL_A352 : SI_SPILL_VGPR <AReg_352, 1>;
		defm SI_SPILL_A384 : SI_SPILL_VGPR <AReg_384, 1>;
defm SI_SPILL_A512 : SI_SPILL_VGPR <AReg_512, 1>;		defm SI_SPILL_A512 : SI_SPILL_VGPR <AReg_512, 1>;
defm SI_SPILL_A1024 : SI_SPILL_VGPR <AReg_1024, 1>;		defm SI_SPILL_A1024 : SI_SPILL_VGPR <AReg_1024, 1>;

defm SI_SPILL_AV32 : SI_SPILL_VGPR <AV_32, 1>;		defm SI_SPILL_AV32 : SI_SPILL_VGPR <AV_32, 1>;
defm SI_SPILL_AV64 : SI_SPILL_VGPR <AV_64, 1>;		defm SI_SPILL_AV64 : SI_SPILL_VGPR <AV_64, 1>;
defm SI_SPILL_AV96 : SI_SPILL_VGPR <AV_96, 1>;		defm SI_SPILL_AV96 : SI_SPILL_VGPR <AV_96, 1>;
defm SI_SPILL_AV128 : SI_SPILL_VGPR <AV_128, 1>;		defm SI_SPILL_AV128 : SI_SPILL_VGPR <AV_128, 1>;
defm SI_SPILL_AV160 : SI_SPILL_VGPR <AV_160, 1>;		defm SI_SPILL_AV160 : SI_SPILL_VGPR <AV_160, 1>;
defm SI_SPILL_AV192 : SI_SPILL_VGPR <AV_192, 1>;		defm SI_SPILL_AV192 : SI_SPILL_VGPR <AV_192, 1>;
defm SI_SPILL_AV224 : SI_SPILL_VGPR <AV_224, 1>;		defm SI_SPILL_AV224 : SI_SPILL_VGPR <AV_224, 1>;
defm SI_SPILL_AV256 : SI_SPILL_VGPR <AV_256, 1>;		defm SI_SPILL_AV256 : SI_SPILL_VGPR <AV_256, 1>;
		defm SI_SPILL_AV288 : SI_SPILL_VGPR <AV_288, 1>;
		defm SI_SPILL_AV320 : SI_SPILL_VGPR <AV_320, 1>;
		defm SI_SPILL_AV352 : SI_SPILL_VGPR <AV_352, 1>;
		defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;		defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;		defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;

def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <		def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),		(outs SReg_64:$dst),
(ins si_ga:$ptr_lo, si_ga:$ptr_hi),		(ins si_ga:$ptr_lo, si_ga:$ptr_hi),
[(set SReg_64:$dst,		[(set SReg_64:$dst,
(i64 (SIpc_add_rel_offset tglobaladdr:$ptr_lo, tglobaladdr:$ptr_hi)))]> {		(i64 (SIpc_add_rel_offset tglobaladdr:$ptr_lo, tglobaladdr:$ptr_hi)))]> {
▲ Show 20 Lines • Show All 359 Lines • ▼ Show 20 Lines	foreach Index = 0-7 in {
def Extract_Element_v8f32_#Index : Extract_Element <		def Extract_Element_v8f32_#Index : Extract_Element <
f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)		f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
>;		>;
def Insert_Element_v8f32_#Index : Insert_Element <		def Insert_Element_v8f32_#Index : Insert_Element <
f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)		f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
>;		>;
}		}

		foreach Index = 0-8 in {
		def Extract_Element_v9i32_#Index : Extract_Element <
		i32, v9i32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v9i32_#Index : Insert_Element <
		i32, v9i32, Index, !cast<SubRegIndex>(sub#Index)
		>;

		def Extract_Element_v9f32_#Index : Extract_Element <
		f32, v9f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v9f32_#Index : Insert_Element <
		f32, v9f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		}

		foreach Index = 0-9 in {
		def Extract_Element_v10i32_#Index : Extract_Element <
		i32, v10i32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v10i32_#Index : Insert_Element <
		i32, v10i32, Index, !cast<SubRegIndex>(sub#Index)
		>;

		def Extract_Element_v10f32_#Index : Extract_Element <
		f32, v10f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v10f32_#Index : Insert_Element <
		f32, v10f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		}

		foreach Index = 0-10 in {
		def Extract_Element_v11i32_#Index : Extract_Element <
		i32, v11i32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v11i32_#Index : Insert_Element <
		i32, v11i32, Index, !cast<SubRegIndex>(sub#Index)
		>;

		def Extract_Element_v11f32_#Index : Extract_Element <
		f32, v11f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v11f32_#Index : Insert_Element <
		f32, v11f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		}

		foreach Index = 0-11 in {
		def Extract_Element_v12i32_#Index : Extract_Element <
		i32, v12i32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v12i32_#Index : Insert_Element <
		i32, v12i32, Index, !cast<SubRegIndex>(sub#Index)
		>;

		def Extract_Element_v12f32_#Index : Extract_Element <
		f32, v12f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		def Insert_Element_v12f32_#Index : Insert_Element <
		f32, v12f32, Index, !cast<SubRegIndex>(sub#Index)
		>;
		}

foreach Index = 0-15 in {		foreach Index = 0-15 in {
def Extract_Element_v16i32_#Index : Extract_Element <		def Extract_Element_v16i32_#Index : Extract_Element <
i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)		i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
>;		>;
def Insert_Element_v16i32_#Index : Insert_Element <		def Insert_Element_v16i32_#Index : Insert_Element <
i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)		i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
>;		>;

▲ Show 20 Lines • Show All 241 Lines • ▼ Show 20 Lines
def : BitConvert <v16i16, v4i64, VReg_256>;		def : BitConvert <v16i16, v4i64, VReg_256>;
def : BitConvert <v16f16, v4f64, VReg_256>;		def : BitConvert <v16f16, v4f64, VReg_256>;
def : BitConvert <v16i16, v4f64, VReg_256>;		def : BitConvert <v16i16, v4f64, VReg_256>;
def : BitConvert <v4i64, v16f16, VReg_256>;		def : BitConvert <v4i64, v16f16, VReg_256>;
def : BitConvert <v4i64, v16i16, VReg_256>;		def : BitConvert <v4i64, v16i16, VReg_256>;
def : BitConvert <v4f64, v16f16, VReg_256>;		def : BitConvert <v4f64, v16f16, VReg_256>;
def : BitConvert <v4f64, v16i16, VReg_256>;		def : BitConvert <v4f64, v16i16, VReg_256>;

		// 288-bit bitcast
		def : BitConvert <v9i32, v9f32, SReg_288>;
		def : BitConvert <v9f32, v9i32, SReg_288>;
		def : BitConvert <v9i32, v9f32, VReg_288>;
		def : BitConvert <v9f32, v9i32, VReg_288>;

		// 320-bit bitcast
		def : BitConvert <v10i32, v10f32, SReg_320>;
		def : BitConvert <v10f32, v10i32, SReg_320>;
		def : BitConvert <v10i32, v10f32, VReg_320>;
		def : BitConvert <v10f32, v10i32, VReg_320>;

		// 320-bit bitcast
		def : BitConvert <v11i32, v11f32, SReg_352>;
		def : BitConvert <v11f32, v11i32, SReg_352>;
		def : BitConvert <v11i32, v11f32, VReg_352>;
		def : BitConvert <v11f32, v11i32, VReg_352>;

		// 384-bit bitcast
		def : BitConvert <v12i32, v12f32, SReg_384>;
		def : BitConvert <v12f32, v12i32, SReg_384>;
		def : BitConvert <v12i32, v12f32, VReg_384>;
		def : BitConvert <v12f32, v12i32, VReg_384>;

// 512-bit bitcast		// 512-bit bitcast
def : BitConvert <v16i32, v16f32, VReg_512>;		def : BitConvert <v16i32, v16f32, VReg_512>;
def : BitConvert <v16f32, v16i32, VReg_512>;		def : BitConvert <v16f32, v16i32, VReg_512>;
def : BitConvert <v8i64, v8f64, VReg_512>;		def : BitConvert <v8i64, v8f64, VReg_512>;
def : BitConvert <v8f64, v8i64, VReg_512>;		def : BitConvert <v8f64, v8i64, VReg_512>;
def : BitConvert <v8i64, v16i32, VReg_512>;		def : BitConvert <v8i64, v16i32, VReg_512>;
def : BitConvert <v8f64, v16i32, VReg_512>;		def : BitConvert <v8f64, v16i32, VReg_512>;
def : BitConvert <v16i32, v8i64, VReg_512>;		def : BitConvert <v16i32, v8i64, VReg_512>;
▲ Show 20 Lines • Show All 524 Lines • ▼ Show 20 Lines	def : GCNPat<
(insertelt vt:$src, eltvt:$val, (MOVRELOffset i32:$idx, (i32 imm:$offset))),		(insertelt vt:$src, eltvt:$val, (MOVRELOffset i32:$idx, (i32 imm:$offset))),
(!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $src, $idx, imm:$offset, $val)		(!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $src, $idx, imm:$offset, $val)
>;		>;
}		}

defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;		defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;		defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;		defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
		defm : SI_INDIRECT_Pattern <v9f32, f32, "V9">;
		defm : SI_INDIRECT_Pattern <v10f32, f32, "V10">;
		defm : SI_INDIRECT_Pattern <v11f32, f32, "V11">;
		defm : SI_INDIRECT_Pattern <v12f32, f32, "V12">;
defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;		defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
defm : SI_INDIRECT_Pattern <v32f32, f32, "V32">;		defm : SI_INDIRECT_Pattern <v32f32, f32, "V32">;

defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;		defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;		defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;		defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
		defm : SI_INDIRECT_Pattern <v9i32, i32, "V9">;
		defm : SI_INDIRECT_Pattern <v10i32, i32, "V10">;
		defm : SI_INDIRECT_Pattern <v11i32, i32, "V11">;
		defm : SI_INDIRECT_Pattern <v12i32, i32, "V12">;
defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;		defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
defm : SI_INDIRECT_Pattern <v32i32, i32, "V32">;		defm : SI_INDIRECT_Pattern <v32i32, i32, "V32">;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SAD Patterns		// SAD Patterns
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def : GCNPat <		def : GCNPat <
▲ Show 20 Lines • Show All 1,448 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Show First 20 Lines • Show All 2,412 Lines • ▼ Show 20 Lines	getAnyVGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 160)		if (BitWidth <= 160)
return &AMDGPU::VReg_160RegClass;		return &AMDGPU::VReg_160RegClass;
if (BitWidth <= 192)		if (BitWidth <= 192)
return &AMDGPU::VReg_192RegClass;		return &AMDGPU::VReg_192RegClass;
if (BitWidth <= 224)		if (BitWidth <= 224)
return &AMDGPU::VReg_224RegClass;		return &AMDGPU::VReg_224RegClass;
if (BitWidth <= 256)		if (BitWidth <= 256)
return &AMDGPU::VReg_256RegClass;		return &AMDGPU::VReg_256RegClass;
		if (BitWidth <= 288)
		return &AMDGPU::VReg_288RegClass;
		if (BitWidth <= 320)
		return &AMDGPU::VReg_320RegClass;
		if (BitWidth <= 352)
		return &AMDGPU::VReg_352RegClass;
		if (BitWidth <= 384)
		return &AMDGPU::VReg_384RegClass;
if (BitWidth <= 512)		if (BitWidth <= 512)
return &AMDGPU::VReg_512RegClass;		return &AMDGPU::VReg_512RegClass;
if (BitWidth <= 1024)		if (BitWidth <= 1024)
return &AMDGPU::VReg_1024RegClass;		return &AMDGPU::VReg_1024RegClass;

return nullptr;		return nullptr;
}		}

static const TargetRegisterClass *		static const TargetRegisterClass *
getAlignedVGPRClassForBitWidth(unsigned BitWidth) {		getAlignedVGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 64)		if (BitWidth <= 64)
return &AMDGPU::VReg_64_Align2RegClass;		return &AMDGPU::VReg_64_Align2RegClass;
if (BitWidth <= 96)		if (BitWidth <= 96)
return &AMDGPU::VReg_96_Align2RegClass;		return &AMDGPU::VReg_96_Align2RegClass;
if (BitWidth <= 128)		if (BitWidth <= 128)
return &AMDGPU::VReg_128_Align2RegClass;		return &AMDGPU::VReg_128_Align2RegClass;
if (BitWidth <= 160)		if (BitWidth <= 160)
return &AMDGPU::VReg_160_Align2RegClass;		return &AMDGPU::VReg_160_Align2RegClass;
if (BitWidth <= 192)		if (BitWidth <= 192)
return &AMDGPU::VReg_192_Align2RegClass;		return &AMDGPU::VReg_192_Align2RegClass;
if (BitWidth <= 224)		if (BitWidth <= 224)
return &AMDGPU::VReg_224_Align2RegClass;		return &AMDGPU::VReg_224_Align2RegClass;
if (BitWidth <= 256)		if (BitWidth <= 256)
return &AMDGPU::VReg_256_Align2RegClass;		return &AMDGPU::VReg_256_Align2RegClass;
		if (BitWidth <= 288)
		return &AMDGPU::VReg_288_Align2RegClass;
		if (BitWidth <= 320)
		return &AMDGPU::VReg_320_Align2RegClass;
		if (BitWidth <= 352)
		return &AMDGPU::VReg_352_Align2RegClass;
		if (BitWidth <= 384)
		return &AMDGPU::VReg_384_Align2RegClass;
if (BitWidth <= 512)		if (BitWidth <= 512)
return &AMDGPU::VReg_512_Align2RegClass;		return &AMDGPU::VReg_512_Align2RegClass;
if (BitWidth <= 1024)		if (BitWidth <= 1024)
return &AMDGPU::VReg_1024_Align2RegClass;		return &AMDGPU::VReg_1024_Align2RegClass;

return nullptr;		return nullptr;
}		}

Show All 20 Lines	getAnyAGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 160)		if (BitWidth <= 160)
return &AMDGPU::AReg_160RegClass;		return &AMDGPU::AReg_160RegClass;
if (BitWidth <= 192)		if (BitWidth <= 192)
return &AMDGPU::AReg_192RegClass;		return &AMDGPU::AReg_192RegClass;
if (BitWidth <= 224)		if (BitWidth <= 224)
return &AMDGPU::AReg_224RegClass;		return &AMDGPU::AReg_224RegClass;
if (BitWidth <= 256)		if (BitWidth <= 256)
return &AMDGPU::AReg_256RegClass;		return &AMDGPU::AReg_256RegClass;
		if (BitWidth <= 288)
		return &AMDGPU::AReg_288RegClass;
		if (BitWidth <= 320)
		return &AMDGPU::AReg_320RegClass;
		if (BitWidth <= 352)
		return &AMDGPU::AReg_352RegClass;
		if (BitWidth <= 384)
		return &AMDGPU::AReg_384RegClass;
if (BitWidth <= 512)		if (BitWidth <= 512)
return &AMDGPU::AReg_512RegClass;		return &AMDGPU::AReg_512RegClass;
if (BitWidth <= 1024)		if (BitWidth <= 1024)
return &AMDGPU::AReg_1024RegClass;		return &AMDGPU::AReg_1024RegClass;

return nullptr;		return nullptr;
}		}

static const TargetRegisterClass *		static const TargetRegisterClass *
getAlignedAGPRClassForBitWidth(unsigned BitWidth) {		getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 64)		if (BitWidth <= 64)
return &AMDGPU::AReg_64_Align2RegClass;		return &AMDGPU::AReg_64_Align2RegClass;
if (BitWidth <= 96)		if (BitWidth <= 96)
return &AMDGPU::AReg_96_Align2RegClass;		return &AMDGPU::AReg_96_Align2RegClass;
if (BitWidth <= 128)		if (BitWidth <= 128)
return &AMDGPU::AReg_128_Align2RegClass;		return &AMDGPU::AReg_128_Align2RegClass;
if (BitWidth <= 160)		if (BitWidth <= 160)
return &AMDGPU::AReg_160_Align2RegClass;		return &AMDGPU::AReg_160_Align2RegClass;
if (BitWidth <= 192)		if (BitWidth <= 192)
return &AMDGPU::AReg_192_Align2RegClass;		return &AMDGPU::AReg_192_Align2RegClass;
if (BitWidth <= 224)		if (BitWidth <= 224)
return &AMDGPU::AReg_224_Align2RegClass;		return &AMDGPU::AReg_224_Align2RegClass;
if (BitWidth <= 256)		if (BitWidth <= 256)
return &AMDGPU::AReg_256_Align2RegClass;		return &AMDGPU::AReg_256_Align2RegClass;
		if (BitWidth <= 288)
		return &AMDGPU::AReg_288_Align2RegClass;
		if (BitWidth <= 320)
		return &AMDGPU::AReg_320_Align2RegClass;
		if (BitWidth <= 352)
		return &AMDGPU::AReg_352_Align2RegClass;
		if (BitWidth <= 384)
		return &AMDGPU::AReg_384_Align2RegClass;
if (BitWidth <= 512)		if (BitWidth <= 512)
return &AMDGPU::AReg_512_Align2RegClass;		return &AMDGPU::AReg_512_Align2RegClass;
if (BitWidth <= 1024)		if (BitWidth <= 1024)
return &AMDGPU::AReg_1024_Align2RegClass;		return &AMDGPU::AReg_1024_Align2RegClass;

return nullptr;		return nullptr;
}		}

Show All 18 Lines	getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 160)		if (BitWidth <= 160)
return &AMDGPU::AV_160RegClass;		return &AMDGPU::AV_160RegClass;
if (BitWidth <= 192)		if (BitWidth <= 192)
return &AMDGPU::AV_192RegClass;		return &AMDGPU::AV_192RegClass;
if (BitWidth <= 224)		if (BitWidth <= 224)
return &AMDGPU::AV_224RegClass;		return &AMDGPU::AV_224RegClass;
if (BitWidth <= 256)		if (BitWidth <= 256)
return &AMDGPU::AV_256RegClass;		return &AMDGPU::AV_256RegClass;
		if (BitWidth <= 288)
		return &AMDGPU::AV_288RegClass;
		if (BitWidth <= 320)
		return &AMDGPU::AV_320RegClass;
		if (BitWidth <= 352)
		return &AMDGPU::AV_352RegClass;
		if (BitWidth <= 384)
		return &AMDGPU::AV_384RegClass;
if (BitWidth <= 512)		if (BitWidth <= 512)
return &AMDGPU::AV_512RegClass;		return &AMDGPU::AV_512RegClass;
if (BitWidth <= 1024)		if (BitWidth <= 1024)
return &AMDGPU::AV_1024RegClass;		return &AMDGPU::AV_1024RegClass;

return nullptr;		return nullptr;
}		}

static const TargetRegisterClass *		static const TargetRegisterClass *
getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {		getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 64)		if (BitWidth <= 64)
return &AMDGPU::AV_64_Align2RegClass;		return &AMDGPU::AV_64_Align2RegClass;
if (BitWidth <= 96)		if (BitWidth <= 96)
return &AMDGPU::AV_96_Align2RegClass;		return &AMDGPU::AV_96_Align2RegClass;
if (BitWidth <= 128)		if (BitWidth <= 128)
return &AMDGPU::AV_128_Align2RegClass;		return &AMDGPU::AV_128_Align2RegClass;
if (BitWidth <= 160)		if (BitWidth <= 160)
return &AMDGPU::AV_160_Align2RegClass;		return &AMDGPU::AV_160_Align2RegClass;
if (BitWidth <= 192)		if (BitWidth <= 192)
return &AMDGPU::AV_192_Align2RegClass;		return &AMDGPU::AV_192_Align2RegClass;
if (BitWidth <= 224)		if (BitWidth <= 224)
return &AMDGPU::AV_224_Align2RegClass;		return &AMDGPU::AV_224_Align2RegClass;
if (BitWidth <= 256)		if (BitWidth <= 256)
return &AMDGPU::AV_256_Align2RegClass;		return &AMDGPU::AV_256_Align2RegClass;
		if (BitWidth <= 288)
		return &AMDGPU::AV_288_Align2RegClass;
		if (BitWidth <= 320)
		return &AMDGPU::AV_320_Align2RegClass;
		if (BitWidth <= 352)
		return &AMDGPU::AV_352_Align2RegClass;
		if (BitWidth <= 384)
		return &AMDGPU::AV_384_Align2RegClass;
if (BitWidth <= 512)		if (BitWidth <= 512)
return &AMDGPU::AV_512_Align2RegClass;		return &AMDGPU::AV_512_Align2RegClass;
if (BitWidth <= 1024)		if (BitWidth <= 1024)
return &AMDGPU::AV_1024_Align2RegClass;		return &AMDGPU::AV_1024_Align2RegClass;

return nullptr;		return nullptr;
}		}

Show All 23 Lines	SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 160)		if (BitWidth <= 160)
return &AMDGPU::SGPR_160RegClass;		return &AMDGPU::SGPR_160RegClass;
if (BitWidth <= 192)		if (BitWidth <= 192)
return &AMDGPU::SGPR_192RegClass;		return &AMDGPU::SGPR_192RegClass;
if (BitWidth <= 224)		if (BitWidth <= 224)
return &AMDGPU::SGPR_224RegClass;		return &AMDGPU::SGPR_224RegClass;
if (BitWidth <= 256)		if (BitWidth <= 256)
return &AMDGPU::SGPR_256RegClass;		return &AMDGPU::SGPR_256RegClass;
		if (BitWidth <= 288)
		return &AMDGPU::SGPR_288RegClass;
		if (BitWidth <= 320)
		return &AMDGPU::SGPR_320RegClass;
		if (BitWidth <= 352)
		return &AMDGPU::SGPR_352RegClass;
		if (BitWidth <= 384)
		return &AMDGPU::SGPR_384RegClass;
if (BitWidth <= 512)		if (BitWidth <= 512)
return &AMDGPU::SGPR_512RegClass;		return &AMDGPU::SGPR_512RegClass;
if (BitWidth <= 1024)		if (BitWidth <= 1024)
return &AMDGPU::SGPR_1024RegClass;		return &AMDGPU::SGPR_1024RegClass;

return nullptr;		return nullptr;
}		}

Show All 40 Lines	static const TargetRegisterClass *const BaseClasses[] = {
&AMDGPU::SReg_224RegClass,		&AMDGPU::SReg_224RegClass,
&AMDGPU::AReg_224_Align2RegClass,		&AMDGPU::AReg_224_Align2RegClass,
&AMDGPU::AReg_224RegClass,		&AMDGPU::AReg_224RegClass,
&AMDGPU::VReg_256_Align2RegClass,		&AMDGPU::VReg_256_Align2RegClass,
&AMDGPU::VReg_256RegClass,		&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,		&AMDGPU::SReg_256RegClass,
&AMDGPU::AReg_256_Align2RegClass,		&AMDGPU::AReg_256_Align2RegClass,
&AMDGPU::AReg_256RegClass,		&AMDGPU::AReg_256RegClass,
		&AMDGPU::VReg_288_Align2RegClass,
		&AMDGPU::VReg_288RegClass,
		&AMDGPU::SReg_288RegClass,
		&AMDGPU::AReg_288_Align2RegClass,
		&AMDGPU::AReg_288RegClass,
		&AMDGPU::VReg_320_Align2RegClass,
		&AMDGPU::VReg_320RegClass,
		&AMDGPU::SReg_320RegClass,
		&AMDGPU::AReg_320_Align2RegClass,
		&AMDGPU::AReg_320RegClass,
		&AMDGPU::VReg_352_Align2RegClass,
		&AMDGPU::VReg_352RegClass,
		&AMDGPU::SReg_352RegClass,
		&AMDGPU::AReg_352_Align2RegClass,
		&AMDGPU::AReg_352RegClass,
		&AMDGPU::VReg_384_Align2RegClass,
		&AMDGPU::VReg_384RegClass,
		&AMDGPU::SReg_384RegClass,
		&AMDGPU::AReg_384_Align2RegClass,
		&AMDGPU::AReg_384RegClass,
&AMDGPU::VReg_512_Align2RegClass,		&AMDGPU::VReg_512_Align2RegClass,
&AMDGPU::VReg_512RegClass,		&AMDGPU::VReg_512RegClass,
&AMDGPU::SReg_512RegClass,		&AMDGPU::SReg_512RegClass,
&AMDGPU::AReg_512_Align2RegClass,		&AMDGPU::AReg_512_Align2RegClass,
&AMDGPU::AReg_512RegClass,		&AMDGPU::AReg_512RegClass,
&AMDGPU::SReg_1024RegClass,		&AMDGPU::SReg_1024RegClass,
&AMDGPU::VReg_1024_Align2RegClass,		&AMDGPU::VReg_1024_Align2RegClass,
&AMDGPU::VReg_1024RegClass,		&AMDGPU::VReg_1024RegClass,
▲ Show 20 Lines • Show All 414 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Show First 20 Lines • Show All 54 Lines • ▼ Show 20 Lines
class getSubRegs<int size> {		class getSubRegs<int size> {
list<SubRegIndex> ret2 = [sub0, sub1];		list<SubRegIndex> ret2 = [sub0, sub1];
list<SubRegIndex> ret3 = [sub0, sub1, sub2];		list<SubRegIndex> ret3 = [sub0, sub1, sub2];
list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];		list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];		list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
list<SubRegIndex> ret6 = [sub0, sub1, sub2, sub3, sub4, sub5];		list<SubRegIndex> ret6 = [sub0, sub1, sub2, sub3, sub4, sub5];
list<SubRegIndex> ret7 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6];		list<SubRegIndex> ret7 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6];
list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];		list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
		list<SubRegIndex> ret9 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, sub8];
		list<SubRegIndex> ret10 = [sub0, sub1, sub2, sub3,
		sub4, sub5, sub6, sub7,
		sub8, sub9];
		list<SubRegIndex> ret11 = [sub0, sub1, sub2, sub3,
		sub4, sub5, sub6, sub7,
		sub8, sub9, sub10];
		list<SubRegIndex> ret12 = [sub0, sub1, sub2, sub3,
		sub4, sub5, sub6, sub7,
		sub8, sub9, sub10, sub11];
list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,		list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
sub4, sub5, sub6, sub7,		sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11,		sub8, sub9, sub10, sub11,
sub12, sub13, sub14, sub15];		sub12, sub13, sub14, sub15];
list<SubRegIndex> ret32 = [sub0, sub1, sub2, sub3,		list<SubRegIndex> ret32 = [sub0, sub1, sub2, sub3,
sub4, sub5, sub6, sub7,		sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11,		sub8, sub9, sub10, sub11,
sub12, sub13, sub14, sub15,		sub12, sub13, sub14, sub15,
sub16, sub17, sub18, sub19,		sub16, sub17, sub18, sub19,
sub20, sub21, sub22, sub23,		sub20, sub21, sub22, sub23,
sub24, sub25, sub26, sub27,		sub24, sub25, sub26, sub27,
sub28, sub29, sub30, sub31];		sub28, sub29, sub30, sub31];

list<SubRegIndex> ret = !if(!eq(size, 2), ret2,		list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
!if(!eq(size, 3), ret3,		!if(!eq(size, 3), ret3,
!if(!eq(size, 4), ret4,		!if(!eq(size, 4), ret4,
!if(!eq(size, 5), ret5,		!if(!eq(size, 5), ret5,
!if(!eq(size, 6), ret6,		!if(!eq(size, 6), ret6,
!if(!eq(size, 7), ret7,		!if(!eq(size, 7), ret7,
!if(!eq(size, 8), ret8,		!if(!eq(size, 8), ret8,
		!if(!eq(size, 9), ret9,
		!if(!eq(size, 10), ret10,
		!if(!eq(size, 11), ret11,
		!if(!eq(size, 12), ret12,
!if(!eq(size, 16), ret16,		!if(!eq(size, 16), ret16,
ret32))))))));		ret32))))))))))));
}		}

// Generates list of sequential register tuple names.		// Generates list of sequential register tuple names.
// E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ]		// E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ]
class RegSeqNames<int last_reg, int stride, int size, string prefix,		class RegSeqNames<int last_reg, int stride, int size, string prefix,
int start = 0> {		int start = 0> {
int next = !add(start, stride);		int next = !add(start, stride);
int end_reg = !add(start, size, -1);		int end_reg = !add(start, size, -1);
▲ Show 20 Lines • Show All 301 Lines • ▼ Show 20 Lines
def SGPR_192Regs : SIRegisterTuples<getSubRegs<6>.ret, SGPR_32, 105, 4, 6, "s">;		def SGPR_192Regs : SIRegisterTuples<getSubRegs<6>.ret, SGPR_32, 105, 4, 6, "s">;

// SGPR 224-bit registers. No operations use these, but for symmetry with 224-bit VGPRs.		// SGPR 224-bit registers. No operations use these, but for symmetry with 224-bit VGPRs.
def SGPR_224Regs : SIRegisterTuples<getSubRegs<7>.ret, SGPR_32, 105, 4, 7, "s">;		def SGPR_224Regs : SIRegisterTuples<getSubRegs<7>.ret, SGPR_32, 105, 4, 7, "s">;

// SGPR 256-bit registers		// SGPR 256-bit registers
def SGPR_256Regs : SIRegisterTuples<getSubRegs<8>.ret, SGPR_32, 105, 4, 8, "s">;		def SGPR_256Regs : SIRegisterTuples<getSubRegs<8>.ret, SGPR_32, 105, 4, 8, "s">;

		// SGPR 288-bit registers. No operations use these, but for symmetry with 288-bit VGPRs.
		def SGPR_288Regs : SIRegisterTuples<getSubRegs<9>.ret, SGPR_32, 105, 4, 9, "s">;

		// SGPR 320-bit registers. No operations use these, but for symmetry with 320-bit VGPRs.
		def SGPR_320Regs : SIRegisterTuples<getSubRegs<10>.ret, SGPR_32, 105, 4, 10, "s">;

		// SGPR 352-bit registers. No operations use these, but for symmetry with 352-bit VGPRs.
		def SGPR_352Regs : SIRegisterTuples<getSubRegs<11>.ret, SGPR_32, 105, 4, 11, "s">;

		// SGPR 384-bit registers. No operations use these, but for symmetry with 384-bit VGPRs.
		def SGPR_384Regs : SIRegisterTuples<getSubRegs<12>.ret, SGPR_32, 105, 4, 12, "s">;

// SGPR 512-bit registers		// SGPR 512-bit registers
def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s">;		def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s">;

// SGPR 1024-bit registers		// SGPR 1024-bit registers
def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;		def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;

// Trap handler TMP 32-bit registers		// Trap handler TMP 32-bit registers
def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,		def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
Show All 26 Lines
def TTMP_192Regs : SIRegisterTuples<getSubRegs<6>.ret, TTMP_32, 15, 4, 6, "ttmp">;		def TTMP_192Regs : SIRegisterTuples<getSubRegs<6>.ret, TTMP_32, 15, 4, 6, "ttmp">;

// Trap handler TMP 224-bit registers		// Trap handler TMP 224-bit registers
def TTMP_224Regs : SIRegisterTuples<getSubRegs<7>.ret, TTMP_32, 15, 4, 7, "ttmp">;		def TTMP_224Regs : SIRegisterTuples<getSubRegs<7>.ret, TTMP_32, 15, 4, 7, "ttmp">;

// Trap handler TMP 256-bit registers		// Trap handler TMP 256-bit registers
def TTMP_256Regs : SIRegisterTuples<getSubRegs<8>.ret, TTMP_32, 15, 4, 8, "ttmp">;		def TTMP_256Regs : SIRegisterTuples<getSubRegs<8>.ret, TTMP_32, 15, 4, 8, "ttmp">;

		// Trap handler TMP 288-bit registers
		def TTMP_288Regs : SIRegisterTuples<getSubRegs<9>.ret, TTMP_32, 15, 4, 9, "ttmp">;

		// Trap handler TMP 320-bit registers
		def TTMP_320Regs : SIRegisterTuples<getSubRegs<10>.ret, TTMP_32, 15, 4, 10, "ttmp">;

		// Trap handler TMP 352-bit registers
		def TTMP_352Regs : SIRegisterTuples<getSubRegs<11>.ret, TTMP_32, 15, 4, 11, "ttmp">;

		// Trap handler TMP 384-bit registers
		def TTMP_384Regs : SIRegisterTuples<getSubRegs<12>.ret, TTMP_32, 15, 4, 12, "ttmp">;

// Trap handler TMP 512-bit registers		// Trap handler TMP 512-bit registers
def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttmp">;		def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttmp">;

class TmpRegTuplesBase<int index, int size,		class TmpRegTuplesBase<int index, int size,
list<Register> subRegs,		list<Register> subRegs,
list<SubRegIndex> indices = getSubRegs<size>.ret,		list<SubRegIndex> indices = getSubRegs<size>.ret,
int index1 = !add(index, size, -1),		int index1 = !add(index, size, -1),
string name = "ttmp["#index#":"#index1#"]"> :		string name = "ttmp["#index#":"#index1#"]"> :
▲ Show 20 Lines • Show All 128 Lines • ▼ Show 20 Lines
def VGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, VGPR_32, 255, 1, 6, "v">;		def VGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, VGPR_32, 255, 1, 6, "v">;

// VGPR 224-bit registers		// VGPR 224-bit registers
def VGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, VGPR_32, 255, 1, 7, "v">;		def VGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, VGPR_32, 255, 1, 7, "v">;

// VGPR 256-bit registers		// VGPR 256-bit registers
def VGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, VGPR_32, 255, 1, 8, "v">;		def VGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, VGPR_32, 255, 1, 8, "v">;

		// VGPR 288-bit registers
		def VGPR_288 : SIRegisterTuples<getSubRegs<9>.ret, VGPR_32, 255, 1, 9, "v">;

		// VGPR 320-bit registers
		def VGPR_320 : SIRegisterTuples<getSubRegs<10>.ret, VGPR_32, 255, 1, 10, "v">;

		// VGPR 352-bit registers
		def VGPR_352 : SIRegisterTuples<getSubRegs<11>.ret, VGPR_32, 255, 1, 11, "v">;

		// VGPR 384-bit registers
		def VGPR_384 : SIRegisterTuples<getSubRegs<12>.ret, VGPR_32, 255, 1, 12, "v">;

// VGPR 512-bit registers		// VGPR 512-bit registers
def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;		def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;

// VGPR 1024-bit registers		// VGPR 1024-bit registers
def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;		def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;

let HasAGPR = 1 in {		let HasAGPR = 1 in {
def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,		def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
Show All 28 Lines
def AGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, AGPR_32, 255, 1, 6, "a">;		def AGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, AGPR_32, 255, 1, 6, "a">;

// AGPR 224-bit registers		// AGPR 224-bit registers
def AGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, AGPR_32, 255, 1, 7, "a">;		def AGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, AGPR_32, 255, 1, 7, "a">;

// AGPR 256-bit registers		// AGPR 256-bit registers
def AGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, AGPR_32, 255, 1, 8, "a">;		def AGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, AGPR_32, 255, 1, 8, "a">;

		// AGPR 288-bit registers
		def AGPR_288 : SIRegisterTuples<getSubRegs<9>.ret, AGPR_32, 255, 1, 9, "a">;

		// AGPR 320-bit registers
		def AGPR_320 : SIRegisterTuples<getSubRegs<10>.ret, AGPR_32, 255, 1, 10, "a">;

		// AGPR 352-bit registers
		def AGPR_352 : SIRegisterTuples<getSubRegs<11>.ret, AGPR_32, 255, 1, 11, "a">;

		// AGPR 384-bit registers
		def AGPR_384 : SIRegisterTuples<getSubRegs<12>.ret, AGPR_32, 255, 1, 12, "a">;

// AGPR 512-bit registers		// AGPR 512-bit registers
def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;		def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;

// AGPR 1024-bit registers		// AGPR 1024-bit registers
def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;		def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Register classes used as source and destination		// Register classes used as source and destination
▲ Show 20 Lines • Show All 156 Lines • ▼ Show 20 Lines
}		}

defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;		defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;		defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;		defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;		defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;		defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;		defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
		defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>;
		defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>;
		defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
		defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>;

let GlobalPriority = true in {		let GlobalPriority = true in {
defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;		defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;		defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
}		}

def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,		def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {		(add VGPR_32, LDS_DIRECT_CLASS)> {
Show All 28 Lines	defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
(add VGPR_64)>;		(add VGPR_64)>;
defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;		defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;		defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;
defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;		defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;

defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;		defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>;		defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>;
defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>;		defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>;
		defm VReg_288 : VRegClass<9, [v9i32, v9f32], (add VGPR_288)>;
		defm VReg_320 : VRegClass<10, [v10i32, v10f32], (add VGPR_320)>;
		defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
		defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>;

let GlobalPriority = true in {		let GlobalPriority = true in {
defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;		defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;		defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
}		}

multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {		multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {		let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
// Define the regular class.		// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList>;		def "" : VRegClassBase<numRegs, regTypes, regList>;

// Define 2-aligned variant		// Define 2-aligned variant
def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;		def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
}		}
}		}

defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],		defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
(add AGPR_64)>;		(add AGPR_64)>;
defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;		defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;		defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;
defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;		defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;		defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;		defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
defm AReg_256 : ARegClass<8, [v8i32, v8f32, v4i64, v4f64], (add AGPR_256)>;		defm AReg_256 : ARegClass<8, [v8i32, v8f32, v4i64, v4f64], (add AGPR_256)>;
		defm AReg_288 : ARegClass<9, [v9i32, v9f32], (add AGPR_288)>;
		defm AReg_320 : ARegClass<10, [v10i32, v10f32], (add AGPR_320)>;
		defm AReg_352 : ARegClass<11, [v11i32, v11f32], (add AGPR_352)>;
		defm AReg_384 : ARegClass<12, [v12i32, v12f32], (add AGPR_384)>;

let GlobalPriority = true in {		let GlobalPriority = true in {
defm AReg_512 : ARegClass<16, [v16i32, v16f32, v8i64, v8f64], (add AGPR_512)>;		defm AReg_512 : ARegClass<16, [v16i32, v16f32, v8i64, v8f64], (add AGPR_512)>;
defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)>;		defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)>;
}		}

} // End GeneratePressureSet = 0		} // End GeneratePressureSet = 0

▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines

defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;		defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;		defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;		defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;		defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
defm AV_192 : AVRegClass<6, VReg_192.RegTypes, (add VGPR_192), (add AGPR_192)>;		defm AV_192 : AVRegClass<6, VReg_192.RegTypes, (add VGPR_192), (add AGPR_192)>;
defm AV_224 : AVRegClass<7, VReg_224.RegTypes, (add VGPR_224), (add AGPR_224)>;		defm AV_224 : AVRegClass<7, VReg_224.RegTypes, (add VGPR_224), (add AGPR_224)>;
defm AV_256 : AVRegClass<8, VReg_256.RegTypes, (add VGPR_256), (add AGPR_256)>;		defm AV_256 : AVRegClass<8, VReg_256.RegTypes, (add VGPR_256), (add AGPR_256)>;
		defm AV_288 : AVRegClass<9, VReg_288.RegTypes, (add VGPR_288), (add AGPR_288)>;
		defm AV_320 : AVRegClass<10, VReg_320.RegTypes, (add VGPR_320), (add AGPR_320)>;
		defm AV_352 : AVRegClass<11, VReg_352.RegTypes, (add VGPR_352), (add AGPR_352)>;
		defm AV_384 : AVRegClass<12, VReg_384.RegTypes, (add VGPR_384), (add AGPR_384)>;

let GlobalPriority = true in {		let GlobalPriority = true in {
defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;		defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;		defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Register operands		// Register operands
▲ Show 20 Lines • Show All 318 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Show First 20 Lines • Show All 286 Lines • ▼ Show 20 Lines	void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
} else if (Info->VAddrDwords == 5) {		} else if (Info->VAddrDwords == 5) {
RC = &AMDGPU::VReg_160RegClass;		RC = &AMDGPU::VReg_160RegClass;
} else if (Info->VAddrDwords == 6) {		} else if (Info->VAddrDwords == 6) {
RC = &AMDGPU::VReg_192RegClass;		RC = &AMDGPU::VReg_192RegClass;
} else if (Info->VAddrDwords == 7) {		} else if (Info->VAddrDwords == 7) {
RC = &AMDGPU::VReg_224RegClass;		RC = &AMDGPU::VReg_224RegClass;
} else if (Info->VAddrDwords == 8) {		} else if (Info->VAddrDwords == 8) {
RC = &AMDGPU::VReg_256RegClass;		RC = &AMDGPU::VReg_256RegClass;
		} else if (Info->VAddrDwords == 9) {
		RC = &AMDGPU::VReg_288RegClass;
		} else if (Info->VAddrDwords == 10) {
		RC = &AMDGPU::VReg_320RegClass;
		} else if (Info->VAddrDwords == 11) {
		RC = &AMDGPU::VReg_352RegClass;
		} else if (Info->VAddrDwords == 12) {
		RC = &AMDGPU::VReg_384RegClass;
} else {		} else {
RC = &AMDGPU::VReg_512RegClass;		RC = &AMDGPU::VReg_512RegClass;
NewAddrDwords = 16;		NewAddrDwords = 16;
}		}

unsigned VgprBase = 0;		unsigned VgprBase = 0;
unsigned NextVgpr = 0;		unsigned NextVgpr = 0;
bool IsUndef = true;		bool IsUndef = true;
▲ Show 20 Lines • Show All 706 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Show First 20 Lines • Show All 2,168 Lines • ▼ Show 20 Lines	unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SReg_256RegClassID:		case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:		case AMDGPU::VReg_256RegClassID:
case AMDGPU::AReg_256RegClassID:		case AMDGPU::AReg_256RegClassID:
case AMDGPU::VReg_256_Align2RegClassID:		case AMDGPU::VReg_256_Align2RegClassID:
case AMDGPU::AReg_256_Align2RegClassID:		case AMDGPU::AReg_256_Align2RegClassID:
case AMDGPU::AV_256RegClassID:		case AMDGPU::AV_256RegClassID:
case AMDGPU::AV_256_Align2RegClassID:		case AMDGPU::AV_256_Align2RegClassID:
return 256;		return 256;
		case AMDGPU::SGPR_288RegClassID:
		case AMDGPU::SReg_288RegClassID:
		case AMDGPU::VReg_288RegClassID:
		case AMDGPU::AReg_288RegClassID:
		case AMDGPU::VReg_288_Align2RegClassID:
		case AMDGPU::AReg_288_Align2RegClassID:
		case AMDGPU::AV_288RegClassID:
		case AMDGPU::AV_288_Align2RegClassID:
		return 288;
		case AMDGPU::SGPR_320RegClassID:
		case AMDGPU::SReg_320RegClassID:
		case AMDGPU::VReg_320RegClassID:
		case AMDGPU::AReg_320RegClassID:
		case AMDGPU::VReg_320_Align2RegClassID:
		case AMDGPU::AReg_320_Align2RegClassID:
		case AMDGPU::AV_320RegClassID:
		case AMDGPU::AV_320_Align2RegClassID:
		return 320;
		case AMDGPU::SGPR_352RegClassID:
		case AMDGPU::SReg_352RegClassID:
		case AMDGPU::VReg_352RegClassID:
		case AMDGPU::AReg_352RegClassID:
		case AMDGPU::VReg_352_Align2RegClassID:
		case AMDGPU::AReg_352_Align2RegClassID:
		case AMDGPU::AV_352RegClassID:
		case AMDGPU::AV_352_Align2RegClassID:
		return 352;
		case AMDGPU::SGPR_384RegClassID:
		case AMDGPU::SReg_384RegClassID:
		case AMDGPU::VReg_384RegClassID:
		case AMDGPU::AReg_384RegClassID:
		case AMDGPU::VReg_384_Align2RegClassID:
		case AMDGPU::AReg_384_Align2RegClassID:
		case AMDGPU::AV_384RegClassID:
		case AMDGPU::AV_384_Align2RegClassID:
		return 384;
case AMDGPU::SGPR_512RegClassID:		case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:		case AMDGPU::SReg_512RegClassID:
case AMDGPU::VReg_512RegClassID:		case AMDGPU::VReg_512RegClassID:
case AMDGPU::AReg_512RegClassID:		case AMDGPU::AReg_512RegClassID:
case AMDGPU::VReg_512_Align2RegClassID:		case AMDGPU::VReg_512_Align2RegClassID:
case AMDGPU::AReg_512_Align2RegClassID:		case AMDGPU::AReg_512_Align2RegClassID:
case AMDGPU::AV_512RegClassID:		case AMDGPU::AV_512RegClassID:
case AMDGPU::AV_512_Align2RegClassID:		case AMDGPU::AV_512_Align2RegClassID:
▲ Show 20 Lines • Show All 363 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll

	Show All 9 Lines
	; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9i32 = add <9 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; ALL-SIZE-LABEL: 'add_i32'			; ALL-SIZE-LABEL: 'add_i32'
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9i32 = add <9 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%i32 = add i32 undef, undef			%i32 = add i32 undef, undef
	%v2i32 = add <2 x i32> undef, undef			%v2i32 = add <2 x i32> undef, undef
	%v3i32 = add <3 x i32> undef, undef			%v3i32 = add <3 x i32> undef, undef
	%v4i32 = add <4 x i32> undef, undef			%v4i32 = add <4 x i32> undef, undef
	%v5i32 = add <5 x i32> undef, undef			%v5i32 = add <5 x i32> undef, undef
	%v6i32 = add <6 x i32> undef, undef			%v6i32 = add <6 x i32> undef, undef
	▲ Show 20 Lines • Show All 173 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll

	Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll

	Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.uadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	Show All 12 Lines
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.usub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/fadd.ll

	Show All 9 Lines
	define amdgpu_kernel void @fadd_f32() #0 {			define amdgpu_kernel void @fadd_f32() #0 {
	; GFX90A-FASTF64-LABEL: 'fadd_f32'			; GFX90A-FASTF64-LABEL: 'fadd_f32'
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fadd <9 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; NOPACKEDF32-LABEL: 'fadd_f32'			; NOPACKEDF32-LABEL: 'fadd_f32'
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fadd <9 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f32'			; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f32'
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fadd <9 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	; NOPACKEDF32-SIZE-LABEL: 'fadd_f32'			; NOPACKEDF32-SIZE-LABEL: 'fadd_f32'
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fadd <9 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%f32 = fadd float undef, undef			%f32 = fadd float undef, undef
	%v2f32 = fadd <2 x float> undef, undef			%v2f32 = fadd <2 x float> undef, undef
	%v3f32 = fadd <3 x float> undef, undef			%v3f32 = fadd <3 x float> undef, undef
	%v4f32 = fadd <4 x float> undef, undef			%v4f32 = fadd <4 x float> undef, undef
	%v5f32 = fadd <5 x float> undef, undef			%v5f32 = fadd <5 x float> undef, undef
	%v8f32 = fadd <8 x float> undef, undef			%v8f32 = fadd <8 x float> undef, undef
	▲ Show 20 Lines • Show All 105 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll

	Show All 14 Lines
	define amdgpu_kernel void @fdiv_f32_ieee() #0 {			define amdgpu_kernel void @fdiv_f32_ieee() #0 {
	; ALL-LABEL: 'fdiv_f32_ieee'			; ALL-LABEL: 'fdiv_f32_ieee'
	; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %v9f32 = fdiv <9 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; ALL-SIZE-LABEL: 'fdiv_f32_ieee'			; ALL-SIZE-LABEL: 'fdiv_f32_ieee'
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f32 = fdiv <8 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f32 = fdiv <8 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v9f32 = fdiv <9 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %v9f32 = fdiv <9 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%f32 = fdiv float undef, undef			%f32 = fdiv float undef, undef
	%v2f32 = fdiv <2 x float> undef, undef			%v2f32 = fdiv <2 x float> undef, undef
	%v3f32 = fdiv <3 x float> undef, undef			%v3f32 = fdiv <3 x float> undef, undef
	%v4f32 = fdiv <4 x float> undef, undef			%v4f32 = fdiv <4 x float> undef, undef
	%v5f32 = fdiv <5 x float> undef, undef			%v5f32 = fdiv <5 x float> undef, undef
	%v8f32 = fdiv <8 x float> undef, undef			%v8f32 = fdiv <8 x float> undef, undef
	%v9f32 = fdiv <9 x float> undef, undef			%v9f32 = fdiv <9 x float> undef, undef
	ret void			ret void
	}			}

	define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {			define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
	; ALL-LABEL: 'fdiv_f32_ftzdaz'			; ALL-LABEL: 'fdiv_f32_ftzdaz'
	; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f32 = fdiv float undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f32 = fdiv float undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2f32 = fdiv <2 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2f32 = fdiv <2 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8f32 = fdiv <8 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8f32 = fdiv <8 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v9f32 = fdiv <9 x float> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %v9f32 = fdiv <9 x float> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; ALL-SIZE-LABEL: 'fdiv_f32_ftzdaz'			; ALL-SIZE-LABEL: 'fdiv_f32_ftzdaz'
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %v9f32 = fdiv <9 x float> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%f32 = fdiv float undef, undef			%f32 = fdiv float undef, undef
	%v2f32 = fdiv <2 x float> undef, undef			%v2f32 = fdiv <2 x float> undef, undef
	%v3f32 = fdiv <3 x float> undef, undef			%v3f32 = fdiv <3 x float> undef, undef
	%v4f32 = fdiv <4 x float> undef, undef			%v4f32 = fdiv <4 x float> undef, undef
	%v5f32 = fdiv <5 x float> undef, undef			%v5f32 = fdiv <5 x float> undef, undef
	%v8f32 = fdiv <8 x float> undef, undef			%v8f32 = fdiv <8 x float> undef, undef
	▲ Show 20 Lines • Show All 609 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/fma.ll

	Show All 11 Lines
	define amdgpu_kernel void @fma_f32() #0 {			define amdgpu_kernel void @fma_f32() #0 {
	; SLOWF64-LABEL: 'fma_f32'			; SLOWF64-LABEL: 'fma_f32'
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
	; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; FASTF64-LABEL: 'fma_f32'			; FASTF64-LABEL: 'fma_f32'
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2			; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2			; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2			; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2			; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2			; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2			; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2			; FASTF64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
	; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; SLOW-LABEL: 'fma_f32'			; SLOW-LABEL: 'fma_f32'
	; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2			; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
	; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2			; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
	; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2			; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
	; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2			; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
	; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2			; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
	; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2			; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
	; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2			; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
	; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; SLOWF64-SIZE-LABEL: 'fma_f32'			; SLOWF64-SIZE-LABEL: 'fma_f32'
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
	; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	; FASTF64-SIZE-LABEL: 'fma_f32'			; FASTF64-SIZE-LABEL: 'fma_f32'
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
	; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	; SLOW-SIZE-LABEL: 'fma_f32'			; SLOW-SIZE-LABEL: 'fma_f32'
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
	; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #1			%f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #1
	%v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #1			%v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #1
	%v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #1			%v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #1
	%v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #1			%v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #1
	%v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #1			%v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #1
	%v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #1			%v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #1
	▲ Show 20 Lines • Show All 136 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/fmul.ll

	Show All 9 Lines
	define amdgpu_kernel void @fmul_f32() #0 {			define amdgpu_kernel void @fmul_f32() #0 {
	; GFX90A-FASTF64-LABEL: 'fmul_f32'			; GFX90A-FASTF64-LABEL: 'fmul_f32'
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fmul <9 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; F32-LABEL: 'fmul_f32'			; F32-LABEL: 'fmul_f32'
	; F32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef			; F32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
	; F32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef			; F32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
	; F32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef			; F32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
	; F32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef			; F32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
	; F32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef			; F32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
	; F32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef			; F32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
	; F32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef			; F32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fmul <9 x float> undef, undef
	; F32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; F32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; GFX90A-SIZE-LABEL: 'fmul_f32'			; GFX90A-SIZE-LABEL: 'fmul_f32'
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fmul <9 x float> undef, undef
	; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	; SIZE-LABEL: 'fmul_f32'			; SIZE-LABEL: 'fmul_f32'
	; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef			; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
	; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef			; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
	; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef			; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
	; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef			; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
	; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef			; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
	; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef			; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
	; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef			; SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fmul <9 x float> undef, undef
	; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%f32 = fmul float undef, undef			%f32 = fmul float undef, undef
	%v2f32 = fmul <2 x float> undef, undef			%v2f32 = fmul <2 x float> undef, undef
	%v3f32 = fmul <3 x float> undef, undef			%v3f32 = fmul <3 x float> undef, undef
	%v4f32 = fmul <4 x float> undef, undef			%v4f32 = fmul <4 x float> undef, undef
	%v5f32 = fmul <5 x float> undef, undef			%v5f32 = fmul <5 x float> undef, undef
	%v8f32 = fmul <8 x float> undef, undef			%v8f32 = fmul <8 x float> undef, undef
	▲ Show 20 Lines • Show All 105 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/fsub.ll

	Show All 9 Lines
	define amdgpu_kernel void @fsub_f32() #0 {			define amdgpu_kernel void @fsub_f32() #0 {
	; GFX90A-FASTF64-LABEL: 'fsub_f32'			; GFX90A-FASTF64-LABEL: 'fsub_f32'
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fsub <9 x float> undef, undef
	; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; NOPACKEDF32-LABEL: 'fsub_f32'			; NOPACKEDF32-LABEL: 'fsub_f32'
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fsub <9 x float> undef, undef
	; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32'			; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32'
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fsub <9 x float> undef, undef
	; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	; NOPACKEDF32-SIZE-LABEL: 'fsub_f32'			; NOPACKEDF32-SIZE-LABEL: 'fsub_f32'
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fsub <9 x float> undef, undef
	; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%f32 = fsub float undef, undef			%f32 = fsub float undef, undef
	%v2f32 = fsub <2 x float> undef, undef			%v2f32 = fsub <2 x float> undef, undef
	%v3f32 = fsub <3 x float> undef, undef			%v3f32 = fsub <3 x float> undef, undef
	%v4f32 = fsub <4 x float> undef, undef			%v4f32 = fsub <4 x float> undef, undef
	%v5f32 = fsub <5 x float> undef, undef			%v5f32 = fsub <5 x float> undef, undef
	%v8f32 = fsub <8 x float> undef, undef			%v8f32 = fsub <8 x float> undef, undef
	▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AMDGPU/mul.ll

	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
	; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s \| FileCheck -check-prefixes=ALL,SLOW16 %s			; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s \| FileCheck -check-prefixes=ALL,SLOW16 %s
	; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s \| FileCheck -check-prefixes=ALL,FAST16 %s			; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s \| FileCheck -check-prefixes=ALL,FAST16 %s
	; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s \| FileCheck -check-prefixes=ALL-SIZE,SLOW16-SIZE %s			; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s \| FileCheck -check-prefixes=ALL-SIZE,SLOW16-SIZE %s
	; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s \| FileCheck -check-prefixes=ALL-SIZE,FAST16-SIZE %s			; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s \| FileCheck -check-prefixes=ALL-SIZE,FAST16-SIZE %s
	; END.			; END.

	define amdgpu_kernel void @mul_i32() #0 {			define amdgpu_kernel void @mul_i32() #0 {
	; ALL-LABEL: 'mul_i32'			; ALL-LABEL: 'mul_i32'
	; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i32 = mul i32 undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i32 = mul i32 undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i32 = mul <2 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i32 = mul <2 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i32 = mul <3 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i32 = mul <3 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i32 = mul <4 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i32 = mul <4 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i32 = mul <5 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i32 = mul <5 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i32 = mul <8 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i32 = mul <8 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v9i32 = mul <9 x i32> undef, undef			; ALL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v9i32 = mul <9 x i32> undef, undef
	; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void			; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
	;			;
	; ALL-SIZE-LABEL: 'mul_i32'			; ALL-SIZE-LABEL: 'mul_i32'
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = mul i32 undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = mul i32 undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = mul <2 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = mul <2 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i32 = mul <3 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i32 = mul <3 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = mul <4 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = mul <4 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i32 = mul <5 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i32 = mul <5 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = mul <8 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = mul <8 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9i32 = mul <9 x i32> undef, undef			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9i32 = mul <9 x i32> undef, undef
	; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void			; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
	;			;
	%i32 = mul i32 undef, undef			%i32 = mul i32 undef, undef
	%v2i32 = mul <2 x i32> undef, undef			%v2i32 = mul <2 x i32> undef, undef
	%v3i32 = mul <3 x i32> undef, undef			%v3i32 = mul <3 x i32> undef, undef
	%v4i32 = mul <4 x i32> undef, undef			%v4i32 = mul <4 x i32> undef, undef
	%v5i32 = mul <5 x i32> undef, undef			%v5i32 = mul <5 x i32> undef, undef
	%v8i32 = mul <8 x i32> undef, undef			%v8i32 = mul <8 x i32> undef, undef
	▲ Show 20 Lines • Show All 481 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,665 Lines • ▼ Show 20 Lines	entry:
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>		%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef		store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef		store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef		store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef		store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
ret void		ret void
}		}

		define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_s(<9 x float> inreg %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
		; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
		; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
		; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
		; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
		; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
		; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
		; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
		; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
		; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
		; GPRIDX-NEXT: s_set_gpr_idx_on s11, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v9f32_s_v_s:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: v_mov_b32_e32 v9, v0
		; GFX10-NEXT: v_mov_b32_e32 v0, s0
		; GFX10-NEXT: s_mov_b32 m0, s11
		; GFX10-NEXT: v_mov_b32_e32 v1, s1
		; GFX10-NEXT: v_mov_b32_e32 v2, s2
		; GFX10-NEXT: v_mov_b32_e32 v3, s3
		; GFX10-NEXT: v_mov_b32_e32 v4, s4
		; GFX10-NEXT: v_mov_b32_e32 v5, s5
		; GFX10-NEXT: v_mov_b32_e32 v6, s6
		; GFX10-NEXT: v_mov_b32_e32 v7, s7
		; GFX10-NEXT: v_mov_b32_e32 v8, s8
		; GFX10-NEXT: v_movreld_b32_e32 v0, v9
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v9f32_s_v_s:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: v_dual_mov_b32 v9, v0 :: v_dual_mov_b32 v0, s0
		; GFX11-NEXT: s_mov_b32 m0, s11
		; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
		; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4
		; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6
		; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8
		; GFX11-NEXT: v_movreld_b32_e32 v0, v9
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <9 x float> %vec, float %val, i32 %idx
		ret <9 x float> %insert
		}

		define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_v(<9 x float> inreg %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: v_mov_b32_e32 v18, s8
		; GPRIDX-NEXT: v_mov_b32_e32 v10, s0
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v11, s1
		; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v12, s2
		; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v13, s3
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v14, s4
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v15, s5
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v16, s6
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v17, s7
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
		; GPRIDX-NEXT: v_mov_b32_e32 v1, v9
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v9f32_s_v_v:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: v_mov_b32_e32 v18, s8
		; GFX10-NEXT: v_mov_b32_e32 v10, s0
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX10-NEXT: v_mov_b32_e32 v11, s1
		; GFX10-NEXT: v_mov_b32_e32 v12, s2
		; GFX10-NEXT: v_mov_b32_e32 v13, s3
		; GFX10-NEXT: v_mov_b32_e32 v14, s4
		; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX10-NEXT: v_mov_b32_e32 v15, s5
		; GFX10-NEXT: v_mov_b32_e32 v16, s6
		; GFX10-NEXT: v_mov_b32_e32 v17, s7
		; GFX10-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX10-NEXT: v_mov_b32_e32 v1, v9
		; GFX10-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc_lo
		; GFX10-NEXT: v_mov_b32_e32 v0, v10
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v9f32_s_v_v:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: v_dual_mov_b32 v18, s8 :: v_dual_mov_b32 v17, s7
		; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
		; GFX11-NEXT: v_mov_b32_e32 v10, s0
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX11-NEXT: v_dual_mov_b32 v14, s4 :: v_dual_mov_b32 v13, s3
		; GFX11-NEXT: v_dual_mov_b32 v16, s6 :: v_dual_mov_b32 v15, s5
		; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX11-NEXT: v_dual_mov_b32 v1, v9 :: v_dual_cndmask_b32 v8, v18, v0
		; GFX11-NEXT: v_mov_b32_e32 v0, v10
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <9 x float> %vec, float %val, i32 %idx
		ret <9 x float> %insert
		}

		define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_s(<9 x float> %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_s:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: s_mov_b32 m0, s2
		; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v9
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <9 x float> %vec, float %val, i32 %idx
		ret <9 x float> %insert
		}

		define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_v(<9 x float> %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v10
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_v:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v10
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <9 x float> %vec, float %val, i32 %idx
		ret <9 x float> %insert
		}

		define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_s(<10 x float> inreg %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: s_mov_b32 s9, s11
		; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
		; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
		; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
		; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
		; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
		; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
		; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
		; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
		; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
		; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
		; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
		; GPRIDX-NEXT: s_set_gpr_idx_on s12, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v10f32_s_v_s:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: s_mov_b32 s9, s11
		; GFX10-NEXT: v_mov_b32_e32 v10, v0
		; GFX10-NEXT: v_mov_b32_e32 v0, s0
		; GFX10-NEXT: s_mov_b32 m0, s12
		; GFX10-NEXT: v_mov_b32_e32 v1, s1
		; GFX10-NEXT: v_mov_b32_e32 v2, s2
		; GFX10-NEXT: v_mov_b32_e32 v3, s3
		; GFX10-NEXT: v_mov_b32_e32 v4, s4
		; GFX10-NEXT: v_mov_b32_e32 v5, s5
		; GFX10-NEXT: v_mov_b32_e32 v6, s6
		; GFX10-NEXT: v_mov_b32_e32 v7, s7
		; GFX10-NEXT: v_mov_b32_e32 v8, s8
		; GFX10-NEXT: v_mov_b32_e32 v9, s9
		; GFX10-NEXT: v_movreld_b32_e32 v0, v10
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v10f32_s_v_s:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: s_mov_b32 s9, s11
		; GFX11-NEXT: v_mov_b32_e32 v10, v0
		; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
		; GFX11-NEXT: s_mov_b32 m0, s12
		; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
		; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
		; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
		; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
		; GFX11-NEXT: v_movreld_b32_e32 v0, v10
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <10 x float> %vec, float %val, i32 %idx
		ret <10 x float> %insert
		}

		define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_v(<10 x float> inreg %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: s_mov_b32 s9, s11
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: v_mov_b32_e32 v19, s9
		; GPRIDX-NEXT: v_mov_b32_e32 v10, s0
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v11, s1
		; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v12, s2
		; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v13, s3
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v14, s4
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v15, s5
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v16, s6
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v17, s7
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v18, s8
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 8, v1
		; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v18, v0, s[0:1]
		; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v19, v0, vcc
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
		; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v10f32_s_v_v:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: s_mov_b32 s9, s11
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: v_mov_b32_e32 v19, s9
		; GFX10-NEXT: v_mov_b32_e32 v10, s0
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX10-NEXT: v_mov_b32_e32 v11, s1
		; GFX10-NEXT: v_mov_b32_e32 v12, s2
		; GFX10-NEXT: v_mov_b32_e32 v13, s3
		; GFX10-NEXT: v_mov_b32_e32 v14, s4
		; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX10-NEXT: v_mov_b32_e32 v15, s5
		; GFX10-NEXT: v_mov_b32_e32 v16, s6
		; GFX10-NEXT: v_mov_b32_e32 v17, s7
		; GFX10-NEXT: v_mov_b32_e32 v18, s8
		; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
		; GFX10-NEXT: v_mov_b32_e32 v1, v11
		; GFX10-NEXT: v_cndmask_b32_e32 v9, v19, v0, vcc_lo
		; GFX10-NEXT: v_mov_b32_e32 v0, v10
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v10f32_s_v_v:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: s_mov_b32 s9, s11
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: v_dual_mov_b32 v19, s9 :: v_dual_mov_b32 v18, s8
		; GFX11-NEXT: v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX11-NEXT: v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
		; GFX11-NEXT: v_dual_mov_b32 v15, s5 :: v_dual_mov_b32 v14, s4
		; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX11-NEXT: v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s6
		; GFX11-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
		; GFX11-NEXT: v_mov_b32_e32 v1, v11
		; GFX11-NEXT: v_dual_cndmask_b32 v9, v19, v0 :: v_dual_mov_b32 v0, v10
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <10 x float> %vec, float %val, i32 %idx
		ret <10 x float> %insert
		}

		define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_s(<10 x float> %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_s:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: s_mov_b32 m0, s2
		; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v10
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <10 x float> %vec, float %val, i32 %idx
		ret <10 x float> %insert
		}

		define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_v(<10 x float> %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v11
		; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_v:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v11
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc_lo
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <10 x float> %vec, float %val, i32 %idx
		ret <10 x float> %insert
		}

		define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_s(<11 x float> inreg %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: s_mov_b32 s9, s11
		; GPRIDX-NEXT: s_mov_b32 s10, s12
		; GPRIDX-NEXT: v_mov_b32_e32 v11, v0
		; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
		; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
		; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
		; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
		; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
		; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
		; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
		; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
		; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
		; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
		; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
		; GPRIDX-NEXT: s_set_gpr_idx_on s13, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v11
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v11f32_s_v_s:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: s_mov_b32 s9, s11
		; GFX10-NEXT: s_mov_b32 s10, s12
		; GFX10-NEXT: v_mov_b32_e32 v11, v0
		; GFX10-NEXT: v_mov_b32_e32 v0, s0
		; GFX10-NEXT: s_mov_b32 m0, s13
		; GFX10-NEXT: v_mov_b32_e32 v1, s1
		; GFX10-NEXT: v_mov_b32_e32 v2, s2
		; GFX10-NEXT: v_mov_b32_e32 v3, s3
		; GFX10-NEXT: v_mov_b32_e32 v4, s4
		; GFX10-NEXT: v_mov_b32_e32 v5, s5
		; GFX10-NEXT: v_mov_b32_e32 v6, s6
		; GFX10-NEXT: v_mov_b32_e32 v7, s7
		; GFX10-NEXT: v_mov_b32_e32 v8, s8
		; GFX10-NEXT: v_mov_b32_e32 v9, s9
		; GFX10-NEXT: v_mov_b32_e32 v10, s10
		; GFX10-NEXT: v_movreld_b32_e32 v0, v11
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v11f32_s_v_s:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: s_mov_b32 s9, s11
		; GFX11-NEXT: s_mov_b32 s10, s12
		; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, s0
		; GFX11-NEXT: s_mov_b32 m0, s13
		; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
		; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4
		; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6
		; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8
		; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v10, s10
		; GFX11-NEXT: v_movreld_b32_e32 v0, v11
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <11 x float> %vec, float %val, i32 %idx
		ret <11 x float> %insert
		}

		define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_v(<11 x float> inreg %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: s_mov_b32 s10, s12
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: s_mov_b32 s9, s11
		; GPRIDX-NEXT: v_mov_b32_e32 v22, s10
		; GPRIDX-NEXT: v_mov_b32_e32 v12, s0
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v13, s1
		; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v14, s2
		; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v13, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v15, s3
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v16, s4
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v17, s5
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v18, s6
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v21, s9
		; GPRIDX-NEXT: v_mov_b32_e32 v20, s8
		; GPRIDX-NEXT: v_mov_b32_e32 v19, s7
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 9, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 10, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 7, v1
		; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v19, v0, s[4:5]
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v21, v0, s[0:1]
		; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v22, v0, s[2:3]
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
		; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v11f32_s_v_v:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: s_mov_b32 s10, s12
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: s_mov_b32 s9, s11
		; GFX10-NEXT: v_mov_b32_e32 v22, s10
		; GFX10-NEXT: v_mov_b32_e32 v12, s0
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX10-NEXT: v_mov_b32_e32 v13, s1
		; GFX10-NEXT: v_mov_b32_e32 v14, s2
		; GFX10-NEXT: v_mov_b32_e32 v15, s3
		; GFX10-NEXT: v_mov_b32_e32 v16, s4
		; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX10-NEXT: v_mov_b32_e32 v17, s5
		; GFX10-NEXT: v_mov_b32_e32 v18, s6
		; GFX10-NEXT: v_mov_b32_e32 v19, s7
		; GFX10-NEXT: v_mov_b32_e32 v20, s8
		; GFX10-NEXT: v_cndmask_b32_e32 v11, v13, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX10-NEXT: v_mov_b32_e32 v21, s9
		; GFX10-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
		; GFX10-NEXT: v_mov_b32_e32 v1, v11
		; GFX10-NEXT: v_cndmask_b32_e32 v10, v22, v0, vcc_lo
		; GFX10-NEXT: v_mov_b32_e32 v0, v12
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v11f32_s_v_v:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: s_mov_b32 s10, s12
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: s_mov_b32 s9, s11
		; GFX11-NEXT: v_dual_mov_b32 v22, s10 :: v_dual_mov_b32 v21, s9
		; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v13, s1
		; GFX11-NEXT: v_mov_b32_e32 v12, s0
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX11-NEXT: v_dual_mov_b32 v16, s4 :: v_dual_mov_b32 v15, s3
		; GFX11-NEXT: v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v17, s5
		; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX11-NEXT: v_dual_mov_b32 v20, s8 :: v_dual_mov_b32 v19, s7
		; GFX11-NEXT: v_cndmask_b32_e32 v11, v13, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
		; GFX11-NEXT: v_dual_mov_b32 v1, v11 :: v_dual_cndmask_b32 v10, v22, v0
		; GFX11-NEXT: v_mov_b32_e32 v0, v12
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <11 x float> %vec, float %val, i32 %idx
		ret <11 x float> %insert
		}

		define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_s(<11 x float> %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v11
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_s:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: s_mov_b32 m0, s2
		; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v11
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <11 x float> %vec, float %val, i32 %idx
		ret <11 x float> %insert
		}

		define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_v(<11 x float> %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v12
		; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_v:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v12
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc_lo
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <11 x float> %vec, float %val, i32 %idx
		ret <11 x float> %insert
		}

		define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_s(<12 x float> inreg %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: s_mov_b32 s9, s11
		; GPRIDX-NEXT: s_mov_b32 s10, s12
		; GPRIDX-NEXT: s_mov_b32 s11, s13
		; GPRIDX-NEXT: v_mov_b32_e32 v12, v0
		; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
		; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
		; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
		; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
		; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
		; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
		; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
		; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
		; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
		; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
		; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
		; GPRIDX-NEXT: v_mov_b32_e32 v11, s11
		; GPRIDX-NEXT: s_set_gpr_idx_on s14, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v12f32_s_v_s:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: s_mov_b32 s9, s11
		; GFX10-NEXT: s_mov_b32 s10, s12
		; GFX10-NEXT: s_mov_b32 s11, s13
		; GFX10-NEXT: v_mov_b32_e32 v12, v0
		; GFX10-NEXT: v_mov_b32_e32 v0, s0
		; GFX10-NEXT: s_mov_b32 m0, s14
		; GFX10-NEXT: v_mov_b32_e32 v1, s1
		; GFX10-NEXT: v_mov_b32_e32 v2, s2
		; GFX10-NEXT: v_mov_b32_e32 v3, s3
		; GFX10-NEXT: v_mov_b32_e32 v4, s4
		; GFX10-NEXT: v_mov_b32_e32 v5, s5
		; GFX10-NEXT: v_mov_b32_e32 v6, s6
		; GFX10-NEXT: v_mov_b32_e32 v7, s7
		; GFX10-NEXT: v_mov_b32_e32 v8, s8
		; GFX10-NEXT: v_mov_b32_e32 v9, s9
		; GFX10-NEXT: v_mov_b32_e32 v10, s10
		; GFX10-NEXT: v_mov_b32_e32 v11, s11
		; GFX10-NEXT: v_movreld_b32_e32 v0, v12
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v12f32_s_v_s:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: s_mov_b32 s9, s11
		; GFX11-NEXT: s_mov_b32 s10, s12
		; GFX11-NEXT: s_mov_b32 s11, s13
		; GFX11-NEXT: v_mov_b32_e32 v12, v0
		; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
		; GFX11-NEXT: s_mov_b32 m0, s14
		; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
		; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
		; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
		; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
		; GFX11-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
		; GFX11-NEXT: v_movreld_b32_e32 v0, v12
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <12 x float> %vec, float %val, i32 %idx
		ret <12 x float> %insert
		}

		define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_v(<12 x float> inreg %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_mov_b32 s1, s3
		; GPRIDX-NEXT: s_mov_b32 s3, s5
		; GPRIDX-NEXT: s_mov_b32 s5, s7
		; GPRIDX-NEXT: s_mov_b32 s7, s9
		; GPRIDX-NEXT: s_mov_b32 s9, s11
		; GPRIDX-NEXT: s_mov_b32 s11, s13
		; GPRIDX-NEXT: s_mov_b32 s0, s2
		; GPRIDX-NEXT: s_mov_b32 s2, s4
		; GPRIDX-NEXT: s_mov_b32 s4, s6
		; GPRIDX-NEXT: s_mov_b32 s6, s8
		; GPRIDX-NEXT: s_mov_b32 s8, s10
		; GPRIDX-NEXT: s_mov_b32 s10, s12
		; GPRIDX-NEXT: v_mov_b32_e32 v23, s11
		; GPRIDX-NEXT: v_mov_b32_e32 v12, s0
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v13, s1
		; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v14, s2
		; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v15, s3
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v16, s4
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v17, s5
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
		; GPRIDX-NEXT: v_mov_b32_e32 v22, s10
		; GPRIDX-NEXT: v_mov_b32_e32 v21, s9
		; GPRIDX-NEXT: v_mov_b32_e32 v20, s8
		; GPRIDX-NEXT: v_mov_b32_e32 v19, s7
		; GPRIDX-NEXT: v_mov_b32_e32 v18, s6
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 8, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 9, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 10, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 11, v1
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v1
		; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v18, v0, s[8:9]
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v20, v0, s[0:1]
		; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v21, v0, s[2:3]
		; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v22, v0, s[4:5]
		; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v23, v0, s[6:7]
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
		; GPRIDX-NEXT: v_mov_b32_e32 v1, v13
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10-LABEL: dyn_insertelement_v12f32_s_v_v:
		; GFX10: ; %bb.0: ; %entry
		; GFX10-NEXT: s_mov_b32 s1, s3
		; GFX10-NEXT: s_mov_b32 s3, s5
		; GFX10-NEXT: s_mov_b32 s5, s7
		; GFX10-NEXT: s_mov_b32 s7, s9
		; GFX10-NEXT: s_mov_b32 s9, s11
		; GFX10-NEXT: s_mov_b32 s11, s13
		; GFX10-NEXT: s_mov_b32 s0, s2
		; GFX10-NEXT: s_mov_b32 s2, s4
		; GFX10-NEXT: s_mov_b32 s4, s6
		; GFX10-NEXT: s_mov_b32 s6, s8
		; GFX10-NEXT: s_mov_b32 s8, s10
		; GFX10-NEXT: s_mov_b32 s10, s12
		; GFX10-NEXT: v_mov_b32_e32 v23, s11
		; GFX10-NEXT: v_mov_b32_e32 v12, s0
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX10-NEXT: v_mov_b32_e32 v13, s1
		; GFX10-NEXT: v_mov_b32_e32 v14, s2
		; GFX10-NEXT: v_mov_b32_e32 v15, s3
		; GFX10-NEXT: v_mov_b32_e32 v16, s4
		; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX10-NEXT: v_mov_b32_e32 v17, s5
		; GFX10-NEXT: v_mov_b32_e32 v18, s6
		; GFX10-NEXT: v_mov_b32_e32 v19, s7
		; GFX10-NEXT: v_mov_b32_e32 v20, s8
		; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX10-NEXT: v_mov_b32_e32 v21, s9
		; GFX10-NEXT: v_mov_b32_e32 v22, s10
		; GFX10-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
		; GFX10-NEXT: v_cndmask_b32_e32 v10, v22, v0, vcc_lo
		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1
		; GFX10-NEXT: v_mov_b32_e32 v1, v13
		; GFX10-NEXT: v_cndmask_b32_e32 v11, v23, v0, vcc_lo
		; GFX10-NEXT: v_mov_b32_e32 v0, v12
		; GFX10-NEXT: ; return to shader part epilog
		;
		; GFX11-LABEL: dyn_insertelement_v12f32_s_v_v:
		; GFX11: ; %bb.0: ; %entry
		; GFX11-NEXT: s_mov_b32 s1, s3
		; GFX11-NEXT: s_mov_b32 s3, s5
		; GFX11-NEXT: s_mov_b32 s5, s7
		; GFX11-NEXT: s_mov_b32 s7, s9
		; GFX11-NEXT: s_mov_b32 s9, s11
		; GFX11-NEXT: s_mov_b32 s11, s13
		; GFX11-NEXT: s_mov_b32 s0, s2
		; GFX11-NEXT: s_mov_b32 s2, s4
		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s8, s10
		; GFX11-NEXT: s_mov_b32 s10, s12
		; GFX11-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v22, s10
		; GFX11-NEXT: v_dual_mov_b32 v13, s1 :: v_dual_mov_b32 v12, s0
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
		; GFX11-NEXT: v_dual_mov_b32 v15, s3 :: v_dual_mov_b32 v14, s2
		; GFX11-NEXT: v_dual_mov_b32 v17, s5 :: v_dual_mov_b32 v16, s4
		; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
		; GFX11-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6
		; GFX11-NEXT: v_dual_mov_b32 v21, s9 :: v_dual_mov_b32 v20, s8
		; GFX11-NEXT: v_cndmask_b32_e32 v13, v13, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
		; GFX11-NEXT: v_cndmask_b32_e32 v10, v22, v0, vcc_lo
		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1
		; GFX11-NEXT: v_mov_b32_e32 v1, v13
		; GFX11-NEXT: v_dual_cndmask_b32 v11, v23, v0 :: v_dual_mov_b32 v0, v12
		; GFX11-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <12 x float> %vec, float %val, i32 %idx
		ret <12 x float> %insert
		}

		define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_s(<12 x float> %vec, float %val, i32 inreg %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_s:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
		; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
		; GPRIDX-NEXT: s_set_gpr_idx_off
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_s:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: s_mov_b32 m0, s2
		; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v12
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <12 x float> %vec, float %val, i32 %idx
		ret <12 x float> %insert
		}

		define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_v(<12 x float> %vec, float %val, i32 %idx) {
		; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_v:
		; GPRIDX: ; %bb.0: ; %entry
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v13
		; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc
		; GPRIDX-NEXT: ; return to shader part epilog
		;
		; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_v:
		; GFX10PLUS: ; %bb.0: ; %entry
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc_lo
		; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v13
		; GFX10PLUS-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc_lo
		; GFX10PLUS-NEXT: ; return to shader part epilog
		entry:
		%insert = insertelement <12 x float> %vec, float %val, i32 %idx
		ret <12 x float> %insert
		}

define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {		define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s:		; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s:
; GPRIDX: ; %bb.0: ; %entry		; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2		; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3		; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4		; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5		; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6		; GPRIDX-NEXT: s_mov_b32 s4, s6
▲ Show 20 Lines • Show All 2,559 Lines • ▼ Show 20 Lines
entry:		entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx		%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert		ret <5 x double> %insert
}		}

define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) {		define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) {
; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s:		; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s:
; GPRIDX: ; %bb.0: ; %entry		; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3		; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5		; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7		; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9		; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11		; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: v_mov_b32_e32 v17, s15		; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: v_mov_b32_e32 v16, s14		; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: v_mov_b32_e32 v15, s13		; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: v_mov_b32_e32 v14, s12		; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: v_mov_b32_e32 v13, s11		; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
; GPRIDX-NEXT: v_mov_b32_e32 v11, s9		; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
; GPRIDX-NEXT: v_mov_b32_e32 v3, s1		; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0		; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 0		; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 0
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], s12, 1		; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], s12, 3		; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], s12, 4
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], s12, 2
; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[0:1]
; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[6:7]
; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v0, s[2:3]
; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5]
; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v1, s[0:1]		; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 1
; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v1, s[6:7]		; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v1, s[2:3]		; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5]		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 2
		; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
		; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 3
		; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 4
		; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2		; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3		; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4		; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4
; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5		; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5
; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6		; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6
; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7		; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8		; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9		; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9
; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0		; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0
; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1		; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1
; GPRIDX-NEXT: ; return to shader part epilog		; GPRIDX-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: dyn_insertelement_v5f64_s_v_s:		; GFX10-LABEL: dyn_insertelement_v5f64_s_v_s:
; GFX10: ; %bb.0: ; %entry		; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3		; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5		; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7		; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: s_mov_b32 s7, s9		; GFX10-NEXT: s_mov_b32 s7, s9
; GFX10-NEXT: s_mov_b32 s8, s10
; GFX10-NEXT: s_mov_b32 s9, s11		; GFX10-NEXT: s_mov_b32 s9, s11
; GFX10-NEXT: v_mov_b32_e32 v17, s15		; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: v_mov_b32_e32 v16, s14		; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: v_mov_b32_e32 v15, s13		; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: v_mov_b32_e32 v14, s12		; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: v_mov_b32_e32 v13, s11		; GFX10-NEXT: s_mov_b32 s8, s10
; GFX10-NEXT: v_mov_b32_e32 v12, s10
; GFX10-NEXT: v_mov_b32_e32 v11, s9		; GFX10-NEXT: v_mov_b32_e32 v11, s9
; GFX10-NEXT: v_mov_b32_e32 v10, s8		; GFX10-NEXT: v_mov_b32_e32 v10, s8
; GFX10-NEXT: v_mov_b32_e32 v9, s7		; GFX10-NEXT: v_mov_b32_e32 v9, s7
; GFX10-NEXT: v_mov_b32_e32 v8, s6		; GFX10-NEXT: v_mov_b32_e32 v8, s6
; GFX10-NEXT: v_mov_b32_e32 v7, s5		; GFX10-NEXT: v_mov_b32_e32 v7, s5
; GFX10-NEXT: v_mov_b32_e32 v6, s4		; GFX10-NEXT: v_mov_b32_e32 v6, s4
; GFX10-NEXT: v_mov_b32_e32 v5, s3		; GFX10-NEXT: v_mov_b32_e32 v5, s3
; GFX10-NEXT: v_mov_b32_e32 v4, s2		; GFX10-NEXT: v_mov_b32_e32 v4, s2
Show All 23 Lines
; GFX10-NEXT: v_readfirstlane_b32 s6, v8		; GFX10-NEXT: v_readfirstlane_b32 s6, v8
; GFX10-NEXT: v_readfirstlane_b32 s7, v9		; GFX10-NEXT: v_readfirstlane_b32 s7, v9
; GFX10-NEXT: v_readfirstlane_b32 s8, v0		; GFX10-NEXT: v_readfirstlane_b32 s8, v0
; GFX10-NEXT: v_readfirstlane_b32 s9, v1		; GFX10-NEXT: v_readfirstlane_b32 s9, v1
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: dyn_insertelement_v5f64_s_v_s:		; GFX11-LABEL: dyn_insertelement_v5f64_s_v_s:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: s_mov_b32 s1, s3		; GFX11-NEXT: s_mov_b32 s1, s3
; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: s_mov_b32 s3, s5		; GFX11-NEXT: s_mov_b32 s3, s5
; GFX11-NEXT: s_mov_b32 s4, s6
; GFX11-NEXT: s_mov_b32 s5, s7		; GFX11-NEXT: s_mov_b32 s5, s7
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9		; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: s_mov_b32 s8, s10
; GFX11-NEXT: s_mov_b32 s9, s11		; GFX11-NEXT: s_mov_b32 s9, s11
; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14		; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12		; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s8, s10
; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8		; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6		; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4		; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2		; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0		; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0		; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s12, 1		; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s12, 1
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s12, 4		; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s12, 4
Show All 21 Lines
entry:		entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx		%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert		ret <5 x double> %insert
}		}

define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) {		define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) {
; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v:		; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v:
; GPRIDX: ; %bb.0: ; %entry		; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3		; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5		; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7		; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9		; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11		; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: v_mov_b32_e32 v18, s15		; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: v_mov_b32_e32 v17, s14		; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: v_mov_b32_e32 v16, s13		; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: v_mov_b32_e32 v15, s12		; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: v_mov_b32_e32 v14, s11		; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: v_mov_b32_e32 v13, s10
; GPRIDX-NEXT: v_mov_b32_e32 v12, s9		; GPRIDX-NEXT: v_mov_b32_e32 v12, s9
; GPRIDX-NEXT: v_mov_b32_e32 v11, s8
; GPRIDX-NEXT: v_mov_b32_e32 v10, s7
; GPRIDX-NEXT: v_mov_b32_e32 v9, s6
; GPRIDX-NEXT: v_mov_b32_e32 v8, s5
; GPRIDX-NEXT: v_mov_b32_e32 v7, s4
; GPRIDX-NEXT: v_mov_b32_e32 v6, s3
; GPRIDX-NEXT: v_mov_b32_e32 v5, s2
; GPRIDX-NEXT: v_mov_b32_e32 v4, s1		; GPRIDX-NEXT: v_mov_b32_e32 v4, s1
; GPRIDX-NEXT: v_mov_b32_e32 v3, s0		; GPRIDX-NEXT: v_mov_b32_e32 v3, s0
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2		; GPRIDX-NEXT: v_mov_b32_e32 v6, s3
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2		; GPRIDX-NEXT: v_mov_b32_e32 v5, s2
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2
; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v2
; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc		; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v5, v0, s[6:7]
; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v0, s[0:1]
; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v0, s[2:3]
; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v11, v0, s[4:5]
; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc		; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v1, s[6:7]		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1]		; GPRIDX-NEXT: v_mov_b32_e32 v8, s5
; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v10, v1, s[2:3]		; GPRIDX-NEXT: v_mov_b32_e32 v7, s4
; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5]		; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2
		; GPRIDX-NEXT: v_mov_b32_e32 v11, s8
		; GPRIDX-NEXT: v_mov_b32_e32 v10, s7
		; GPRIDX-NEXT: v_mov_b32_e32 v9, s6
		; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc
		; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2
		; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 4, v2
		; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v1, vcc
		; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v11, v0, s[0:1]
		; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[0:1]
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3		; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3
; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4		; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2		; GPRIDX-NEXT: v_readfirstlane_b32 s2, v5
; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6		; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6
; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5		; GPRIDX-NEXT: v_readfirstlane_b32 s4, v7
; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8		; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7		; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9
; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9		; GPRIDX-NEXT: v_readfirstlane_b32 s7, v2
; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0		; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0
; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1		; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1
; GPRIDX-NEXT: ; return to shader part epilog		; GPRIDX-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: dyn_insertelement_v5f64_s_v_v:		; GFX10-LABEL: dyn_insertelement_v5f64_s_v_v:
; GFX10: ; %bb.0: ; %entry		; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3		; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5		; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7		; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: s_mov_b32 s7, s9		; GFX10-NEXT: s_mov_b32 s7, s9
; GFX10-NEXT: s_mov_b32 s8, s10
; GFX10-NEXT: s_mov_b32 s9, s11		; GFX10-NEXT: s_mov_b32 s9, s11
; GFX10-NEXT: v_mov_b32_e32 v18, s15		; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: v_mov_b32_e32 v17, s14		; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: v_mov_b32_e32 v16, s13		; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: v_mov_b32_e32 v15, s12		; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: v_mov_b32_e32 v14, s11		; GFX10-NEXT: s_mov_b32 s8, s10
; GFX10-NEXT: v_mov_b32_e32 v13, s10
; GFX10-NEXT: v_mov_b32_e32 v12, s9		; GFX10-NEXT: v_mov_b32_e32 v12, s9
; GFX10-NEXT: v_mov_b32_e32 v11, s8		; GFX10-NEXT: v_mov_b32_e32 v11, s8
; GFX10-NEXT: v_mov_b32_e32 v10, s7		; GFX10-NEXT: v_mov_b32_e32 v10, s7
; GFX10-NEXT: v_mov_b32_e32 v9, s6		; GFX10-NEXT: v_mov_b32_e32 v9, s6
; GFX10-NEXT: v_mov_b32_e32 v8, s5		; GFX10-NEXT: v_mov_b32_e32 v8, s5
; GFX10-NEXT: v_mov_b32_e32 v7, s4		; GFX10-NEXT: v_mov_b32_e32 v7, s4
; GFX10-NEXT: v_mov_b32_e32 v6, s3		; GFX10-NEXT: v_mov_b32_e32 v6, s3
; GFX10-NEXT: v_mov_b32_e32 v5, s2		; GFX10-NEXT: v_mov_b32_e32 v5, s2
Show All 23 Lines
; GFX10-NEXT: v_readfirstlane_b32 s6, v8		; GFX10-NEXT: v_readfirstlane_b32 s6, v8
; GFX10-NEXT: v_readfirstlane_b32 s7, v9		; GFX10-NEXT: v_readfirstlane_b32 s7, v9
; GFX10-NEXT: v_readfirstlane_b32 s8, v0		; GFX10-NEXT: v_readfirstlane_b32 s8, v0
; GFX10-NEXT: v_readfirstlane_b32 s9, v1		; GFX10-NEXT: v_readfirstlane_b32 s9, v1
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: dyn_insertelement_v5f64_s_v_v:		; GFX11-LABEL: dyn_insertelement_v5f64_s_v_v:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: s_mov_b32 s1, s3		; GFX11-NEXT: s_mov_b32 s1, s3
; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: s_mov_b32 s3, s5		; GFX11-NEXT: s_mov_b32 s3, s5
; GFX11-NEXT: s_mov_b32 s4, s6
; GFX11-NEXT: s_mov_b32 s5, s7		; GFX11-NEXT: s_mov_b32 s5, s7
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9		; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: s_mov_b32 s8, s10
; GFX11-NEXT: s_mov_b32 s9, s11		; GFX11-NEXT: s_mov_b32 s9, s11
; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14		; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12		; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10		; GFX11-NEXT: s_mov_b32 s4, s6
		; GFX11-NEXT: s_mov_b32 s6, s8
		; GFX11-NEXT: s_mov_b32 s8, s10
; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8		; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6		; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4		; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2		; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0		; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2		; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v2		; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v2
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7		; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7
; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8		; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8
; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9		; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9
; GPRIDX-NEXT: ; return to shader part epilog		; GPRIDX-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: dyn_insertelement_v5f64_v_v_s:		; GFX10-LABEL: dyn_insertelement_v5f64_v_v_s:
; GFX10: ; %bb.0: ; %entry		; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0		; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0
		; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s2, 4
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1		; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
		; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v10, s0
		; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v11, s0
; GFX10-NEXT: v_readfirstlane_b32 s0, v0		; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: v_readfirstlane_b32 s1, v1		; GFX10-NEXT: v_readfirstlane_b32 s1, v1
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2		; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
		; GFX10-NEXT: v_readfirstlane_b32 s8, v8
		; GFX10-NEXT: v_readfirstlane_b32 s9, v9
; GFX10-NEXT: v_readfirstlane_b32 s3, v3		; GFX10-NEXT: v_readfirstlane_b32 s3, v3
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3		; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
		; GFX10-NEXT: v_readfirstlane_b32 s2, v2
; GFX10-NEXT: v_readfirstlane_b32 s4, v4		; GFX10-NEXT: v_readfirstlane_b32 s4, v4
; GFX10-NEXT: v_readfirstlane_b32 s5, v5		; GFX10-NEXT: v_readfirstlane_b32 s5, v5
; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4
; GFX10-NEXT: v_readfirstlane_b32 s2, v2
; GFX10-NEXT: v_readfirstlane_b32 s6, v6		; GFX10-NEXT: v_readfirstlane_b32 s6, v6
; GFX10-NEXT: v_readfirstlane_b32 s7, v7		; GFX10-NEXT: v_readfirstlane_b32 s7, v7
; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc_lo
; GFX10-NEXT: v_readfirstlane_b32 s8, v8
; GFX10-NEXT: v_readfirstlane_b32 s9, v9
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: dyn_insertelement_v5f64_v_v_s:		; GFX11-LABEL: dyn_insertelement_v5f64_v_v_s:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0		; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s2, 3		; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s2, 2
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s2, 4		; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s2, 4
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11		; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1		; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0		; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s0
; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0		; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s0
; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1		; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1
; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1		; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1
; GFX11-NEXT: v_dual_cndmask_b32 v2, v2, v10 :: v_dual_cndmask_b32 v3, v3, v11		; GFX11-NEXT: v_dual_cndmask_b32 v2, v2, v10 :: v_dual_cndmask_b32 v3, v3, v11
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2		; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
; GFX11-NEXT: v_readfirstlane_b32 s0, v0		; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: v_readfirstlane_b32 s1, v1		; GFX11-NEXT: v_readfirstlane_b32 s1, v1
; GFX11-NEXT: v_readfirstlane_b32 s2, v2		; GFX11-NEXT: v_readfirstlane_b32 s2, v2
; GFX11-NEXT: v_readfirstlane_b32 s3, v3		; GFX11-NEXT: v_readfirstlane_b32 s3, v3
; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11		; GFX11-NEXT: v_dual_cndmask_b32 v6, v6, v10 :: v_dual_cndmask_b32 v7, v7, v11
; GFX11-NEXT: v_readfirstlane_b32 s6, v6
; GFX11-NEXT: v_readfirstlane_b32 s7, v7
; GFX11-NEXT: v_readfirstlane_b32 s8, v8
; GFX11-NEXT: v_readfirstlane_b32 s4, v4		; GFX11-NEXT: v_readfirstlane_b32 s4, v4
; GFX11-NEXT: v_readfirstlane_b32 s5, v5		; GFX11-NEXT: v_readfirstlane_b32 s5, v5
		; GFX11-NEXT: v_readfirstlane_b32 s8, v8
		; GFX11-NEXT: v_readfirstlane_b32 s6, v6
		; GFX11-NEXT: v_readfirstlane_b32 s7, v7
; GFX11-NEXT: v_readfirstlane_b32 s9, v9		; GFX11-NEXT: v_readfirstlane_b32 s9, v9
; GFX11-NEXT: ; return to shader part epilog		; GFX11-NEXT: ; return to shader part epilog
entry:		entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx		%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert		ret <5 x double> %insert
}		}

define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) {		define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) {
Show All 24 Lines
; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7		; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7
; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8		; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8
; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9		; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9
; GPRIDX-NEXT: ; return to shader part epilog		; GPRIDX-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: dyn_insertelement_v5f64_v_v_v:		; GFX10-LABEL: dyn_insertelement_v5f64_v_v_v:
; GFX10: ; %bb.0: ; %entry		; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12
		; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 4, v12
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
		; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v10, s0
		; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v11, s0
; GFX10-NEXT: v_readfirstlane_b32 s0, v0		; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: v_readfirstlane_b32 s1, v1		; GFX10-NEXT: v_readfirstlane_b32 s1, v1
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
		; GFX10-NEXT: v_readfirstlane_b32 s8, v8
		; GFX10-NEXT: v_readfirstlane_b32 s9, v9
; GFX10-NEXT: v_readfirstlane_b32 s2, v2		; GFX10-NEXT: v_readfirstlane_b32 s2, v2
; GFX10-NEXT: v_readfirstlane_b32 s3, v3		; GFX10-NEXT: v_readfirstlane_b32 s3, v3
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12		; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
; GFX10-NEXT: v_readfirstlane_b32 s4, v4		; GFX10-NEXT: v_readfirstlane_b32 s4, v4
; GFX10-NEXT: v_readfirstlane_b32 s5, v5		; GFX10-NEXT: v_readfirstlane_b32 s5, v5
; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo		; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12
; GFX10-NEXT: v_readfirstlane_b32 s6, v6		; GFX10-NEXT: v_readfirstlane_b32 s6, v6
; GFX10-NEXT: v_readfirstlane_b32 s7, v7		; GFX10-NEXT: v_readfirstlane_b32 s7, v7
; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc_lo
; GFX10-NEXT: v_readfirstlane_b32 s8, v8
; GFX10-NEXT: v_readfirstlane_b32 s9, v9
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: dyn_insertelement_v5f64_v_v_v:		; GFX11-LABEL: dyn_insertelement_v5f64_v_v_v:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v12		; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v12
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v12		; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v12
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11		; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0		; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s0
; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0		; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s0
; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1		; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1
; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1		; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1
; GFX11-NEXT: v_dual_cndmask_b32 v2, v2, v10 :: v_dual_cndmask_b32 v3, v3, v11		; GFX11-NEXT: v_dual_cndmask_b32 v2, v2, v10 :: v_dual_cndmask_b32 v3, v3, v11
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12		; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
; GFX11-NEXT: v_readfirstlane_b32 s0, v0		; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: v_readfirstlane_b32 s1, v1		; GFX11-NEXT: v_readfirstlane_b32 s1, v1
; GFX11-NEXT: v_readfirstlane_b32 s2, v2		; GFX11-NEXT: v_readfirstlane_b32 s2, v2
; GFX11-NEXT: v_readfirstlane_b32 s3, v3		; GFX11-NEXT: v_readfirstlane_b32 s3, v3
; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11		; GFX11-NEXT: v_dual_cndmask_b32 v6, v6, v10 :: v_dual_cndmask_b32 v7, v7, v11
; GFX11-NEXT: v_readfirstlane_b32 s6, v6
; GFX11-NEXT: v_readfirstlane_b32 s7, v7
; GFX11-NEXT: v_readfirstlane_b32 s8, v8
; GFX11-NEXT: v_readfirstlane_b32 s4, v4		; GFX11-NEXT: v_readfirstlane_b32 s4, v4
; GFX11-NEXT: v_readfirstlane_b32 s5, v5		; GFX11-NEXT: v_readfirstlane_b32 s5, v5
		; GFX11-NEXT: v_readfirstlane_b32 s8, v8
		; GFX11-NEXT: v_readfirstlane_b32 s6, v6
		; GFX11-NEXT: v_readfirstlane_b32 s7, v7
; GFX11-NEXT: v_readfirstlane_b32 s9, v9		; GFX11-NEXT: v_readfirstlane_b32 s9, v9
; GFX11-NEXT: ; return to shader part epilog		; GFX11-NEXT: ; return to shader part epilog
entry:		entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx		%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert		ret <5 x double> %insert
}		}

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir

Show First 20 Lines • Show All 682 Lines • ▼ Show 20 Lines	body: \|
bb.0:		bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15		liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15

; GCN-LABEL: name: test_concat_vectors_s_v6s64_s_v3s64_s_v3s64		; GCN-LABEL: name: test_concat_vectors_s_v6s64_s_v3s64_s_v3s64
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15		; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GCN-NEXT: {{ $}}		; GCN-NEXT: {{ $}}
; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF		; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF		; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[DEF]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[DEF1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11		; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384 = REG_SEQUENCE [[DEF]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[DEF1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11
; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]		; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(<3 x s64>) = G_IMPLICIT_DEF		%0:sgpr(<3 x s64>) = G_IMPLICIT_DEF
%1:sgpr(<3 x s64>) = G_IMPLICIT_DEF		%1:sgpr(<3 x s64>) = G_IMPLICIT_DEF
%2:sgpr(<6 x s64>) = G_CONCAT_VECTORS %0, %1		%2:sgpr(<6 x s64>) = G_CONCAT_VECTORS %0, %1
S_ENDPGM 0, implicit %2		S_ENDPGM 0, implicit %2
...		...

---		---
▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir

Show First 20 Lines • Show All 290 Lines • ▼ Show 20 Lines	bb.0:

; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32		; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32
; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11		; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11
; GCN-NEXT: {{ $}}		; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2		; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5		; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5
; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8		; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11		; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11		; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2		; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5		; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8		; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11		; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11
; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]]		; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]]
; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]]		; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]]
; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]]		; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]]
; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]]		; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]]
Show All 19 Lines	body: \|
bb.0:		bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11		liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11

; GCN-LABEL: name: test_unmerge_v_v3s32_v_v12s32		; GCN-LABEL: name: test_unmerge_v_v3s32_v_v12s32
; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11		; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GCN-NEXT: {{ $}}		; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5		; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_192 = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11		; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_192 = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11		; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_384 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11
; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2		; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5		; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8		; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11		; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11
; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]]		; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]]
; GCN-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]]		; GCN-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]]
; GCN-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[COPY4]]		; GCN-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[COPY4]]
; GCN-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[COPY5]]		; GCN-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[COPY5]]
Show All 10 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll

Show All 12 Lines
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)		declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)		declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)		declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
declare i32 @llvm.amdgcn.workitem.id.x()		declare i32 @llvm.amdgcn.workitem.id.x()

define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {		define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
; GCN-LABEL: image_bvh_intersect_ray:		; GCN-LABEL: image_bvh_intersect_ray:
; GCN: ; %bb.0:		; GCN: ; %bb.0:
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]		; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[0:3]
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget		; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)		%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>		%r = bitcast <4 x i32> %v to <4 x float>
ret <4 x float> %r		ret <4 x float> %r
}		}

define amdgpu_ps <4 x float> @image_bvh_intersect_ray_flat(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {		define amdgpu_ps <4 x float> @image_bvh_intersect_ray_flat(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
; GCN-LABEL: image_bvh_intersect_ray_flat:		; GCN-LABEL: image_bvh_intersect_ray_flat:
; GCN: ; %bb.0:		; GCN: ; %bb.0:
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]		; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[0:3]
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0		%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1		%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2		%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0		%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1		%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2		%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
Show All 31 Lines	; GFX11-NEXT: ; return to shader part epilog
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)		%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>		%r = bitcast <4 x i32> %v to <4 x float>
ret <4 x float> %r		ret <4 x float> %r
}		}

define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {		define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
; GCN-LABEL: image_bvh64_intersect_ray:		; GCN-LABEL: image_bvh64_intersect_ray:
; GCN: ; %bb.0:		; GCN: ; %bb.0:
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]		; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)		%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>		%r = bitcast <4 x i32> %v to <4 x float>
ret <4 x float> %r		ret <4 x float> %r
}		}

define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_flat(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {		define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_flat(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
; GCN-LABEL: image_bvh64_intersect_ray_flat:		; GCN-LABEL: image_bvh64_intersect_ray_flat:
; GCN: ; %bb.0:		; GCN: ; %bb.0:
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]		; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64		%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64
%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0		%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1		%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2		%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0		%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1		%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
Show All 12 Lines
; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v6		; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v6
; GFX10-NEXT: v_and_b32_e32 v11, 0xffff, v8		; GFX10-NEXT: v_and_b32_e32 v11, 0xffff, v8
; GFX10-NEXT: v_and_b32_e32 v9, 0xffff, v9		; GFX10-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v10		; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v11		; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX10-NEXT: v_alignbit_b32 v8, v9, v8, 16		; GFX10-NEXT: v_alignbit_b32 v8, v9, v8, 16
; GFX10-NEXT: v_and_or_b32 v6, v6, 0xffff, v10		; GFX10-NEXT: v_and_or_b32 v6, v6, 0xffff, v10
; GFX10-NEXT: v_and_or_b32 v7, v7, 0xffff, v11		; GFX10-NEXT: v_and_or_b32 v7, v7, 0xffff, v11
; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16		; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_a16:		; GFX11-LABEL: image_bvh64_intersect_ray_a16:
; GFX11: ; %bb.0:		; GFX11: ; %bb.0:
; GFX11-NEXT: v_perm_b32 v10, v6, v8, 0x5040100		; GFX11-NEXT: v_perm_b32 v10, v6, v8, 0x5040100
; GFX11-NEXT: v_perm_b32 v11, v6, v8, 0x7060302		; GFX11-NEXT: v_perm_b32 v11, v6, v8, 0x7060302
; GFX11-NEXT: v_perm_b32 v12, v7, v9, 0x5040100		; GFX11-NEXT: v_perm_b32 v12, v7, v9, 0x5040100
Show All 24 Lines
; GFX1030-NEXT: v_readfirstlane_b32 s4, v11		; GFX1030-NEXT: v_readfirstlane_b32 s4, v11
; GFX1030-NEXT: v_readfirstlane_b32 s5, v12		; GFX1030-NEXT: v_readfirstlane_b32 s5, v12
; GFX1030-NEXT: v_readfirstlane_b32 s6, v13		; GFX1030-NEXT: v_readfirstlane_b32 s6, v13
; GFX1030-NEXT: v_readfirstlane_b32 s7, v14		; GFX1030-NEXT: v_readfirstlane_b32 s7, v14
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]		; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]		; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0		; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0		; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:30], s[4:7]		; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:25], s[4:7]
; GFX1030-NEXT: ; implicit-def: $vgpr11		; GFX1030-NEXT: ; implicit-def: $vgpr11
; GFX1030-NEXT: ; implicit-def: $vgpr15		; GFX1030-NEXT: ; implicit-def: $vgpr15
; GFX1030-NEXT: ; implicit-def: $vgpr16		; GFX1030-NEXT: ; implicit-def: $vgpr16
; GFX1030-NEXT: ; implicit-def: $vgpr17		; GFX1030-NEXT: ; implicit-def: $vgpr17
; GFX1030-NEXT: ; implicit-def: $vgpr18		; GFX1030-NEXT: ; implicit-def: $vgpr18
; GFX1030-NEXT: ; implicit-def: $vgpr19		; GFX1030-NEXT: ; implicit-def: $vgpr19
; GFX1030-NEXT: ; implicit-def: $vgpr20		; GFX1030-NEXT: ; implicit-def: $vgpr20
; GFX1030-NEXT: ; implicit-def: $vgpr21		; GFX1030-NEXT: ; implicit-def: $vgpr21
; GFX1030-NEXT: ; implicit-def: $vgpr22		; GFX1030-NEXT: ; implicit-def: $vgpr22
; GFX1030-NEXT: ; implicit-def: $vgpr23		; GFX1030-NEXT: ; implicit-def: $vgpr23
; GFX1030-NEXT: ; implicit-def: $vgpr24		; GFX1030-NEXT: ; implicit-def: $vgpr24
; GFX1030-NEXT: ; implicit-def: $vgpr25		; GFX1030-NEXT: ; implicit-def: $vgpr25
; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14		; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0		; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz .LBB6_1		; GFX1030-NEXT: s_cbranch_execnz .LBB6_1
; GFX1030-NEXT: ; %bb.2:		; GFX1030-NEXT: ; %bb.2:
; GFX1030-NEXT: s_mov_b32 exec_lo, s1		; GFX1030-NEXT: s_mov_b32 exec_lo, s1
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: ; return to shader part epilog		; GFX1030-NEXT: ; return to shader part epilog
;		;
; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr:		; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr:
; GFX1013: ; %bb.0:		; GFX1013: ; %bb.0:
; GFX1013-NEXT: v_mov_b32_e32 v16, v11
; GFX1013-NEXT: v_mov_b32_e32 v17, v12
; GFX1013-NEXT: v_mov_b32_e32 v18, v13
; GFX1013-NEXT: v_mov_b32_e32 v19, v14
; GFX1013-NEXT: s_mov_b32 s1, exec_lo		; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1		; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16		; GFX1013-NEXT: v_readfirstlane_b32 s4, v11
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17		; GFX1013-NEXT: v_readfirstlane_b32 s5, v12
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18		; GFX1013-NEXT: v_readfirstlane_b32 s6, v13
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19		; GFX1013-NEXT: v_readfirstlane_b32 s7, v14
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]		; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]		; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0		; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0		; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7]		; GFX1013-NEXT: image_bvh_intersect_ray v[15:18], v[0:10], s[4:7]
; GFX1013-NEXT: ; implicit-def: $vgpr16		; GFX1013-NEXT: ; implicit-def: $vgpr11
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15		; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19		; GFX1013-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3		; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0		; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1013-NEXT: s_cbranch_execnz .LBB6_1		; GFX1013-NEXT: s_cbranch_execnz .LBB6_1
; GFX1013-NEXT: ; %bb.2:		; GFX1013-NEXT: ; %bb.2:
; GFX1013-NEXT: s_mov_b32 exec_lo, s1		; GFX1013-NEXT: s_mov_b32 exec_lo, s1
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: v_mov_b32_e32 v0, v20		; GFX1013-NEXT: v_mov_b32_e32 v0, v15
; GFX1013-NEXT: v_mov_b32_e32 v1, v21		; GFX1013-NEXT: v_mov_b32_e32 v1, v16
; GFX1013-NEXT: v_mov_b32_e32 v2, v22		; GFX1013-NEXT: v_mov_b32_e32 v2, v17
; GFX1013-NEXT: v_mov_b32_e32 v3, v23		; GFX1013-NEXT: v_mov_b32_e32 v3, v18
; GFX1013-NEXT: ; return to shader part epilog		; GFX1013-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: image_bvh_intersect_ray_vgpr_descr:		; GFX11-LABEL: image_bvh_intersect_ray_vgpr_descr:
; GFX11: ; %bb.0:		; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v18, v0 :: v_dual_mov_b32 v19, v1		; GFX11-NEXT: v_dual_mov_b32 v18, v0 :: v_dual_mov_b32 v19, v1
; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v16, v3		; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v16, v3
; GFX11-NEXT: v_mov_b32_e32 v17, v4		; GFX11-NEXT: v_mov_b32_e32 v17, v4
; GFX11-NEXT: s_mov_b32 s1, exec_lo		; GFX11-NEXT: s_mov_b32 s1, exec_lo
▲ Show 20 Lines • Show All 165 Lines • ▼ Show 20 Lines
; GFX1030-NEXT: v_readfirstlane_b32 s4, v12		; GFX1030-NEXT: v_readfirstlane_b32 s4, v12
; GFX1030-NEXT: v_readfirstlane_b32 s5, v13		; GFX1030-NEXT: v_readfirstlane_b32 s5, v13
; GFX1030-NEXT: v_readfirstlane_b32 s6, v14		; GFX1030-NEXT: v_readfirstlane_b32 s6, v14
; GFX1030-NEXT: v_readfirstlane_b32 s7, v15		; GFX1030-NEXT: v_readfirstlane_b32 s7, v15
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]		; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]		; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0		; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0		; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:31], s[4:7]		; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:27], s[4:7]
; GFX1030-NEXT: ; implicit-def: $vgpr12		; GFX1030-NEXT: ; implicit-def: $vgpr12
; GFX1030-NEXT: ; implicit-def: $vgpr16		; GFX1030-NEXT: ; implicit-def: $vgpr16
; GFX1030-NEXT: ; implicit-def: $vgpr17		; GFX1030-NEXT: ; implicit-def: $vgpr17
; GFX1030-NEXT: ; implicit-def: $vgpr18		; GFX1030-NEXT: ; implicit-def: $vgpr18
; GFX1030-NEXT: ; implicit-def: $vgpr19		; GFX1030-NEXT: ; implicit-def: $vgpr19
; GFX1030-NEXT: ; implicit-def: $vgpr20		; GFX1030-NEXT: ; implicit-def: $vgpr20
; GFX1030-NEXT: ; implicit-def: $vgpr21		; GFX1030-NEXT: ; implicit-def: $vgpr21
; GFX1030-NEXT: ; implicit-def: $vgpr22		; GFX1030-NEXT: ; implicit-def: $vgpr22
; GFX1030-NEXT: ; implicit-def: $vgpr23		; GFX1030-NEXT: ; implicit-def: $vgpr23
; GFX1030-NEXT: ; implicit-def: $vgpr24		; GFX1030-NEXT: ; implicit-def: $vgpr24
; GFX1030-NEXT: ; implicit-def: $vgpr25		; GFX1030-NEXT: ; implicit-def: $vgpr25
; GFX1030-NEXT: ; implicit-def: $vgpr26		; GFX1030-NEXT: ; implicit-def: $vgpr26
; GFX1030-NEXT: ; implicit-def: $vgpr27		; GFX1030-NEXT: ; implicit-def: $vgpr27
; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15		; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0		; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz .LBB8_1		; GFX1030-NEXT: s_cbranch_execnz .LBB8_1
; GFX1030-NEXT: ; %bb.2:		; GFX1030-NEXT: ; %bb.2:
; GFX1030-NEXT: s_mov_b32 exec_lo, s1		; GFX1030-NEXT: s_mov_b32 exec_lo, s1
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: ; return to shader part epilog		; GFX1030-NEXT: ; return to shader part epilog
;		;
; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr:		; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr:
; GFX1013: ; %bb.0:		; GFX1013: ; %bb.0:
; GFX1013-NEXT: v_mov_b32_e32 v16, v12
; GFX1013-NEXT: v_mov_b32_e32 v17, v13
; GFX1013-NEXT: v_mov_b32_e32 v18, v14
; GFX1013-NEXT: v_mov_b32_e32 v19, v15
; GFX1013-NEXT: s_mov_b32 s1, exec_lo		; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1		; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16		; GFX1013-NEXT: v_readfirstlane_b32 s4, v12
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17		; GFX1013-NEXT: v_readfirstlane_b32 s5, v13
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18		; GFX1013-NEXT: v_readfirstlane_b32 s6, v14
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19		; GFX1013-NEXT: v_readfirstlane_b32 s7, v15
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]		; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]		; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0		; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0		; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7]		; GFX1013-NEXT: image_bvh64_intersect_ray v[16:19], v[0:11], s[4:7]
; GFX1013-NEXT: ; implicit-def: $vgpr16		; GFX1013-NEXT: ; implicit-def: $vgpr12
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15		; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19		; GFX1013-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3		; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0		; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1013-NEXT: s_cbranch_execnz .LBB8_1		; GFX1013-NEXT: s_cbranch_execnz .LBB8_1
; GFX1013-NEXT: ; %bb.2:		; GFX1013-NEXT: ; %bb.2:
; GFX1013-NEXT: s_mov_b32 exec_lo, s1		; GFX1013-NEXT: s_mov_b32 exec_lo, s1
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: v_mov_b32_e32 v0, v20		; GFX1013-NEXT: v_mov_b32_e32 v0, v16
; GFX1013-NEXT: v_mov_b32_e32 v1, v21		; GFX1013-NEXT: v_mov_b32_e32 v1, v17
; GFX1013-NEXT: v_mov_b32_e32 v2, v22		; GFX1013-NEXT: v_mov_b32_e32 v2, v18
; GFX1013-NEXT: v_mov_b32_e32 v3, v23		; GFX1013-NEXT: v_mov_b32_e32 v3, v19
; GFX1013-NEXT: ; return to shader part epilog		; GFX1013-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_vgpr_descr:		; GFX11-LABEL: image_bvh64_intersect_ray_vgpr_descr:
; GFX11: ; %bb.0:		; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v19, v0 :: v_dual_mov_b32 v20, v1		; GFX11-NEXT: v_dual_mov_b32 v19, v0 :: v_dual_mov_b32 v20, v1
; GFX11-NEXT: v_dual_mov_b32 v21, v2 :: v_dual_mov_b32 v16, v3		; GFX11-NEXT: v_dual_mov_b32 v21, v2 :: v_dual_mov_b32 v16, v3
; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v5		; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v5
; GFX11-NEXT: s_mov_b32 s1, exec_lo		; GFX11-NEXT: s_mov_b32 s1, exec_lo
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
; GFX1030-NEXT: v_readfirstlane_b32 s4, v10		; GFX1030-NEXT: v_readfirstlane_b32 s4, v10
; GFX1030-NEXT: v_readfirstlane_b32 s5, v11		; GFX1030-NEXT: v_readfirstlane_b32 s5, v11
; GFX1030-NEXT: v_readfirstlane_b32 s6, v12		; GFX1030-NEXT: v_readfirstlane_b32 s6, v12
; GFX1030-NEXT: v_readfirstlane_b32 s7, v13		; GFX1030-NEXT: v_readfirstlane_b32 s7, v13
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]		; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]		; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0		; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0		; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:29], s[4:7] a16		; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:22], s[4:7] a16
; GFX1030-NEXT: ; implicit-def: $vgpr10		; GFX1030-NEXT: ; implicit-def: $vgpr10
; GFX1030-NEXT: ; implicit-def: $vgpr14		; GFX1030-NEXT: ; implicit-def: $vgpr14
; GFX1030-NEXT: ; implicit-def: $vgpr15		; GFX1030-NEXT: ; implicit-def: $vgpr15
; GFX1030-NEXT: ; implicit-def: $vgpr16		; GFX1030-NEXT: ; implicit-def: $vgpr16
; GFX1030-NEXT: ; implicit-def: $vgpr17		; GFX1030-NEXT: ; implicit-def: $vgpr17
; GFX1030-NEXT: ; implicit-def: $vgpr18		; GFX1030-NEXT: ; implicit-def: $vgpr18
; GFX1030-NEXT: ; implicit-def: $vgpr19		; GFX1030-NEXT: ; implicit-def: $vgpr19
; GFX1030-NEXT: ; implicit-def: $vgpr20		; GFX1030-NEXT: ; implicit-def: $vgpr20
; GFX1030-NEXT: ; implicit-def: $vgpr21		; GFX1030-NEXT: ; implicit-def: $vgpr21
; GFX1030-NEXT: ; implicit-def: $vgpr22		; GFX1030-NEXT: ; implicit-def: $vgpr22
; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13		; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0		; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz .LBB9_1		; GFX1030-NEXT: s_cbranch_execnz .LBB9_1
; GFX1030-NEXT: ; %bb.2:		; GFX1030-NEXT: ; %bb.2:
; GFX1030-NEXT: s_mov_b32 exec_lo, s1		; GFX1030-NEXT: s_mov_b32 exec_lo, s1
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: ; return to shader part epilog		; GFX1030-NEXT: ; return to shader part epilog
;		;
; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:		; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
; GFX1013: ; %bb.0:		; GFX1013: ; %bb.0:
; GFX1013-NEXT: v_mov_b32_e32 v16, v10		; GFX1013-NEXT: v_lshrrev_b32_e32 v14, 16, v6
; GFX1013-NEXT: v_mov_b32_e32 v17, v11		; GFX1013-NEXT: v_and_b32_e32 v15, 0xffff, v8
; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6
; GFX1013-NEXT: v_and_b32_e32 v11, 0xffff, v8
; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9		; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX1013-NEXT: v_mov_b32_e32 v18, v12
; GFX1013-NEXT: v_mov_b32_e32 v19, v13
; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16
; GFX1013-NEXT: s_mov_b32 s1, exec_lo		; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: v_and_or_b32 v6, v6, 0xffff, v10		; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14
; GFX1013-NEXT: v_and_or_b32 v7, v7, 0xffff, v11		; GFX1013-NEXT: v_lshlrev_b32_e32 v15, 16, v15
		; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16
		; GFX1013-NEXT: v_and_or_b32 v6, v6, 0xffff, v14
		; GFX1013-NEXT: v_and_or_b32 v7, v7, 0xffff, v15
; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1		; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16		; GFX1013-NEXT: v_readfirstlane_b32 s4, v10
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17		; GFX1013-NEXT: v_readfirstlane_b32 s5, v11
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18		; GFX1013-NEXT: v_readfirstlane_b32 s6, v12
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19		; GFX1013-NEXT: v_readfirstlane_b32 s7, v13
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]		; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]		; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0		; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0		; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] a16		; GFX1013-NEXT: image_bvh64_intersect_ray v[14:17], v[0:8], s[4:7] a16
; GFX1013-NEXT: ; implicit-def: $vgpr16		; GFX1013-NEXT: ; implicit-def: $vgpr10
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15		; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19		; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3		; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0		; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1013-NEXT: s_cbranch_execnz .LBB9_1		; GFX1013-NEXT: s_cbranch_execnz .LBB9_1
; GFX1013-NEXT: ; %bb.2:		; GFX1013-NEXT: ; %bb.2:
; GFX1013-NEXT: s_mov_b32 exec_lo, s1		; GFX1013-NEXT: s_mov_b32 exec_lo, s1
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: v_mov_b32_e32 v0, v20		; GFX1013-NEXT: v_mov_b32_e32 v0, v14
; GFX1013-NEXT: v_mov_b32_e32 v1, v21		; GFX1013-NEXT: v_mov_b32_e32 v1, v15
; GFX1013-NEXT: v_mov_b32_e32 v2, v22		; GFX1013-NEXT: v_mov_b32_e32 v2, v16
; GFX1013-NEXT: v_mov_b32_e32 v3, v23		; GFX1013-NEXT: v_mov_b32_e32 v3, v17
; GFX1013-NEXT: ; return to shader part epilog		; GFX1013-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:		; GFX11-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
; GFX11: ; %bb.0:		; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v17, v0 :: v_dual_mov_b32 v18, v1		; GFX11-NEXT: v_dual_mov_b32 v17, v0 :: v_dual_mov_b32 v18, v1
; GFX11-NEXT: v_dual_mov_b32 v19, v2 :: v_dual_mov_b32 v14, v3		; GFX11-NEXT: v_dual_mov_b32 v19, v2 :: v_dual_mov_b32 v14, v3
; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v16, v5		; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v16, v5
; GFX11-NEXT: v_perm_b32 v4, v6, v8, 0x5040100		; GFX11-NEXT: v_perm_b32 v4, v6, v8, 0x5040100
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4		; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo		; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0		; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0
; GFX1030-NEXT: flat_load_dword v0, v[0:1]		; GFX1030-NEXT: flat_load_dword v0, v[0:1]
; GFX1030-NEXT: flat_load_dword v1, v[2:3]		; GFX1030-NEXT: flat_load_dword v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, 0		; GFX1030-NEXT: v_mov_b32_e32 v2, 0
; GFX1030-NEXT: v_mov_b32_e32 v3, 1.0		; GFX1030-NEXT: v_mov_b32_e32 v3, 1.0
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]		; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1030-NEXT: s_endpgm		; GFX1030-NEXT: s_endpgm
;		;
; GFX1013-LABEL: image_bvh_intersect_ray_nsa_reassign:		; GFX1013-LABEL: image_bvh_intersect_ray_nsa_reassign:
; GFX1013: ; %bb.0:		; GFX1013: ; %bb.0:
; GFX1013-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24		; GFX1013-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
; GFX1013-NEXT: v_lshlrev_b32_e32 v6, 2, v0		; GFX1013-NEXT: v_lshlrev_b32_e32 v6, 2, v0
Show All 13 Lines
; GFX1013-NEXT: v_mov_b32_e32 v6, 4.0		; GFX1013-NEXT: v_mov_b32_e32 v6, 4.0
; GFX1013-NEXT: flat_load_dword v0, v[4:5]		; GFX1013-NEXT: flat_load_dword v0, v[4:5]
; GFX1013-NEXT: flat_load_dword v1, v[2:3]		; GFX1013-NEXT: flat_load_dword v1, v[2:3]
; GFX1013-NEXT: v_mov_b32_e32 v2, 0		; GFX1013-NEXT: v_mov_b32_e32 v2, 0
; GFX1013-NEXT: v_mov_b32_e32 v3, 1.0		; GFX1013-NEXT: v_mov_b32_e32 v3, 1.0
; GFX1013-NEXT: v_mov_b32_e32 v4, 2.0		; GFX1013-NEXT: v_mov_b32_e32 v4, 2.0
; GFX1013-NEXT: v_mov_b32_e32 v5, 0x40400000		; GFX1013-NEXT: v_mov_b32_e32 v5, 0x40400000
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]		; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1013-NEXT: s_endpgm		; GFX1013-NEXT: s_endpgm
;		;
; GFX11-LABEL: image_bvh_intersect_ray_nsa_reassign:		; GFX11-LABEL: image_bvh_intersect_ray_nsa_reassign:
; GFX11: ; %bb.0:		; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24		; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 2, v0		; GFX11-NEXT: v_lshlrev_b32_e32 v4, 2, v0
▲ Show 20 Lines • Show All 207 Lines • ▼ Show 20 Lines
; GFX1030-NEXT: v_mov_b32_e32 v0, s4		; GFX1030-NEXT: v_mov_b32_e32 v0, s4
; GFX1030-NEXT: v_mov_b32_e32 v1, s5		; GFX1030-NEXT: v_mov_b32_e32 v1, s5
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2		; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo		; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: flat_load_dword v2, v[0:1]		; GFX1030-NEXT: flat_load_dword v2, v[0:1]
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7		; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]		; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1030-NEXT: s_endpgm		; GFX1030-NEXT: s_endpgm
;		;
; GFX1013-LABEL: image_bvh64_intersect_ray_nsa_reassign:		; GFX1013-LABEL: image_bvh64_intersect_ray_nsa_reassign:
; GFX1013: ; %bb.0:		; GFX1013: ; %bb.0:
; GFX1013-NEXT: s_clause 0x1		; GFX1013-NEXT: s_clause 0x1
; GFX1013-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24		; GFX1013-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
Show All 12 Lines
; GFX1013-NEXT: v_mov_b32_e32 v0, s2		; GFX1013-NEXT: v_mov_b32_e32 v0, s2
; GFX1013-NEXT: v_mov_b32_e32 v1, s3		; GFX1013-NEXT: v_mov_b32_e32 v1, s3
; GFX1013-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2		; GFX1013-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1013-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo		; GFX1013-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1013-NEXT: flat_load_dword v2, v[0:1]		; GFX1013-NEXT: flat_load_dword v2, v[0:1]
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7		; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[4:7]		; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[4:7]
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1013-NEXT: s_endpgm		; GFX1013-NEXT: s_endpgm
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_nsa_reassign:		; GFX11-LABEL: image_bvh64_intersect_ray_nsa_reassign:
; GFX11: ; %bb.0:		; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1		; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24		; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines
; GFX1030-NEXT: s_lshl_b32 s6, s6, 16		; GFX1030-NEXT: s_lshl_b32 s6, s6, 16
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1030-NEXT: s_or_b32 s4, s4, s6		; GFX1030-NEXT: s_or_b32 s4, s4, s6
; GFX1030-NEXT: s_or_b32 s6, s8, s7		; GFX1030-NEXT: s_or_b32 s6, s8, s7
; GFX1030-NEXT: v_mov_b32_e32 v6, s5		; GFX1030-NEXT: v_mov_b32_e32 v6, s5
; GFX1030-NEXT: v_mov_b32_e32 v7, s4		; GFX1030-NEXT: v_mov_b32_e32 v7, s4
; GFX1030-NEXT: v_mov_b32_e32 v8, s6		; GFX1030-NEXT: v_mov_b32_e32 v8, s6
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16		; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1030-NEXT: s_endpgm		; GFX1030-NEXT: s_endpgm
;		;
; GFX1013-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:		; GFX1013-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:
; GFX1013: ; %bb.0:		; GFX1013: ; %bb.0:
; GFX1013-NEXT: s_clause 0x1		; GFX1013-NEXT: s_clause 0x1
; GFX1013-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24		; GFX1013-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
Show All 27 Lines
; GFX1013-NEXT: s_or_b32 s0, s0, s2		; GFX1013-NEXT: s_or_b32 s0, s0, s2
; GFX1013-NEXT: s_or_b32 s2, s8, s3		; GFX1013-NEXT: s_or_b32 s2, s8, s3
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6		; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1013-NEXT: v_mov_b32_e32 v6, s1		; GFX1013-NEXT: v_mov_b32_e32 v6, s1
; GFX1013-NEXT: v_mov_b32_e32 v7, s0		; GFX1013-NEXT: v_mov_b32_e32 v7, s0
; GFX1013-NEXT: v_mov_b32_e32 v8, s2		; GFX1013-NEXT: v_mov_b32_e32 v8, s2
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[4:7] a16		; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[4:7] a16
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1013-NEXT: s_endpgm		; GFX1013-NEXT: s_endpgm
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:		; GFX11-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:
; GFX11: ; %bb.0:		; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1		; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24		; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir

# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s \| FileCheck %s		# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s \| FileCheck %s
# Check that %11 and %20 have been coalesced.		# Check that %11 and %20 have been coalesced.
# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG:[0-9]+]]		# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11 %[[REG:[0-9]+]]
# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG]]		# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11 %[[REG]]

---		---
name: main		name: main
alignment: 1		alignment: 1
tracksRegLiveness: true		tracksRegLiveness: true
registers:		registers:
- { id: 0, class: sreg_64 }		- { id: 0, class: sreg_64 }
- { id: 1, class: vgpr_32 }		- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }		- { id: 2, class: vgpr_32 }
- { id: 3, class: sgpr_256 }		- { id: 3, class: sgpr_256 }
- { id: 4, class: sgpr_128 }		- { id: 4, class: sgpr_128 }
- { id: 5, class: sgpr_256 }		- { id: 5, class: sgpr_256 }
- { id: 6, class: sgpr_128 }		- { id: 6, class: sgpr_128 }
- { id: 7, class: sgpr_512 }		- { id: 7, class: sgpr_512 }
- { id: 9, class: vreg_512 }		- { id: 9, class: vreg_512 }
- { id: 11, class: vreg_512 }		- { id: 11, class: vreg_352 }
- { id: 18, class: vgpr_32 }		- { id: 18, class: vgpr_32 }
- { id: 20, class: vreg_512 }		- { id: 20, class: vreg_352 }
- { id: 27, class: vgpr_32 }		- { id: 27, class: vgpr_32 }
liveins:		liveins:
- { reg: '$sgpr2_sgpr3', virtual-reg: '%0' }		- { reg: '$sgpr2_sgpr3', virtual-reg: '%0' }
- { reg: '$vgpr2', virtual-reg: '%1' }		- { reg: '$vgpr2', virtual-reg: '%1' }
- { reg: '$vgpr3', virtual-reg: '%2' }		- { reg: '$vgpr3', virtual-reg: '%2' }
frameInfo:		frameInfo:
isFrameAddressTaken: false		isFrameAddressTaken: false
isReturnAddressTaken: false		isReturnAddressTaken: false
Show All 25 Lines	bb.0:
%11.sub1 = COPY %1		%11.sub1 = COPY %1
%11.sub2 = COPY %1		%11.sub2 = COPY %1
%11.sub3 = COPY %1		%11.sub3 = COPY %1
%11.sub4 = COPY %1		%11.sub4 = COPY %1
%11.sub5 = COPY %1		%11.sub5 = COPY %1
%11.sub6 = COPY %1		%11.sub6 = COPY %1
%11.sub7 = COPY %1		%11.sub7 = COPY %1
%11.sub8 = COPY %1		%11.sub8 = COPY %1
dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))		dead %18 = IMAGE_SAMPLE_C_D_O_V1_V11 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
%20.sub1 = COPY %2		%20.sub1 = COPY %2
%20.sub2 = COPY %2		%20.sub2 = COPY %2
%20.sub3 = COPY %2		%20.sub3 = COPY %2
%20.sub4 = COPY %2		%20.sub4 = COPY %2
%20.sub5 = COPY %2		%20.sub5 = COPY %2
%20.sub6 = COPY %2		%20.sub6 = COPY %2
%20.sub7 = COPY %2		%20.sub7 = COPY %2
%20.sub8 = COPY %2		%20.sub8 = COPY %2
dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))		dead %27 = IMAGE_SAMPLE_C_D_O_V1_V11 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))

...		...

llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll

	Show First 20 Lines • Show All 165 Lines • ▼ Show 20 Lines
	; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x11			; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x11
	; SI-NEXT: s_mov_b32 s11, 0xf000			; SI-NEXT: s_mov_b32 s11, 0xf000
	; SI-NEXT: s_mov_b32 s6, 0			; SI-NEXT: s_mov_b32 s6, 0
	; SI-NEXT: s_mov_b32 s7, s11			; SI-NEXT: s_mov_b32 s7, s11
	; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0			; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
	; SI-NEXT: v_mov_b32_e32 v1, 0			; SI-NEXT: v_mov_b32_e32 v1, 0
	; SI-NEXT: s_waitcnt lgkmcnt(0)			; SI-NEXT: s_waitcnt lgkmcnt(0)
	; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64			; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
				; SI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
				; SI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
				; SI-NEXT: s_mov_b32 s26, -1
				; SI-NEXT: s_mov_b32 s27, 0xe8f000
				; SI-NEXT: s_add_u32 s24, s24, s3
	; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9			; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
	; SI-NEXT: s_mov_b32 s10, -1			; SI-NEXT: s_mov_b32 s10, -1
				; SI-NEXT: s_addc_u32 s25, s25, 0
	; SI-NEXT: s_mov_b32 s14, s10			; SI-NEXT: s_mov_b32 s14, s10
	; SI-NEXT: s_mov_b32 s15, s11			; SI-NEXT: s_mov_b32 s15, s11
	; SI-NEXT: s_mov_b32 s18, s10
	; SI-NEXT: s_waitcnt lgkmcnt(0)			; SI-NEXT: s_waitcnt lgkmcnt(0)
	; SI-NEXT: s_mov_b32 s8, s0			; SI-NEXT: s_mov_b32 s8, s0
	; SI-NEXT: s_mov_b32 s9, s1			; SI-NEXT: s_mov_b32 s9, s1
				; SI-NEXT: s_mov_b32 s18, s10
	; SI-NEXT: s_mov_b32 s19, s11			; SI-NEXT: s_mov_b32 s19, s11
	; SI-NEXT: s_mov_b32 s22, s10			; SI-NEXT: s_mov_b32 s22, s10
	; SI-NEXT: s_mov_b32 s23, s11			; SI-NEXT: s_mov_b32 s23, s11
	; SI-NEXT: s_mov_b32 s12, s2			; SI-NEXT: s_mov_b32 s12, s2
	; SI-NEXT: s_mov_b32 s13, s3			; SI-NEXT: s_mov_b32 s13, s3
	; SI-NEXT: s_mov_b32 s16, s4			; SI-NEXT: s_mov_b32 s16, s4
	; SI-NEXT: s_mov_b32 s17, s5			; SI-NEXT: s_mov_b32 s17, s5
	; SI-NEXT: s_mov_b32 s20, s6			; SI-NEXT: s_mov_b32 s20, s6
	; SI-NEXT: s_mov_b32 s21, s7			; SI-NEXT: s_mov_b32 s21, s7
	; SI-NEXT: s_waitcnt vmcnt(0)			; SI-NEXT: s_waitcnt vmcnt(0)
	; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0			; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0
	; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0			; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0
	; SI-NEXT: buffer_store_dword v0, off, s[16:19], 0			; SI-NEXT: buffer_store_dword v0, off, s[16:19], 0
	; SI-NEXT: buffer_store_dword v0, off, s[20:23], 0			; SI-NEXT: buffer_store_dword v0, off, s[20:23], 0
	; SI-NEXT: s_endpgm			; SI-NEXT: s_endpgm
	;			;
	; VI-LABEL: test_copy_v4i8_x4:			; VI-LABEL: test_copy_v4i8_x4:
	; VI: ; %bb.0:			; VI: ; %bb.0:
				; VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
				; VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
				; VI-NEXT: s_mov_b32 s90, -1
				; VI-NEXT: s_mov_b32 s91, 0xe80000
				; VI-NEXT: s_add_u32 s88, s88, s3
	; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x44			; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x44
	; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0			; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
	; VI-NEXT: s_mov_b32 s11, 0xf000			; VI-NEXT: s_mov_b32 s11, 0xf000
	; VI-NEXT: s_mov_b32 s10, -1			; VI-NEXT: s_mov_b32 s10, -1
	; VI-NEXT: s_mov_b32 s14, s10			; VI-NEXT: s_addc_u32 s89, s89, 0
	; VI-NEXT: s_waitcnt lgkmcnt(0)			; VI-NEXT: s_waitcnt lgkmcnt(0)
	; VI-NEXT: v_mov_b32_e32 v1, s3			; VI-NEXT: v_mov_b32_e32 v1, s3
	; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0			; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
	; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc			; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
	; VI-NEXT: flat_load_dword v0, v[0:1]			; VI-NEXT: flat_load_dword v0, v[0:1]
	; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24			; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
				; VI-NEXT: s_mov_b32 s14, s10
	; VI-NEXT: s_mov_b32 s15, s11			; VI-NEXT: s_mov_b32 s15, s11
	; VI-NEXT: s_mov_b32 s18, s10			; VI-NEXT: s_mov_b32 s18, s10
	; VI-NEXT: s_mov_b32 s19, s11			; VI-NEXT: s_mov_b32 s19, s11
	; VI-NEXT: s_mov_b32 s22, s10
	; VI-NEXT: s_waitcnt lgkmcnt(0)			; VI-NEXT: s_waitcnt lgkmcnt(0)
	; VI-NEXT: s_mov_b32 s8, s0			; VI-NEXT: s_mov_b32 s8, s0
	; VI-NEXT: s_mov_b32 s9, s1			; VI-NEXT: s_mov_b32 s9, s1
				; VI-NEXT: s_mov_b32 s22, s10
	; VI-NEXT: s_mov_b32 s23, s11			; VI-NEXT: s_mov_b32 s23, s11
	; VI-NEXT: s_mov_b32 s12, s2			; VI-NEXT: s_mov_b32 s12, s2
	; VI-NEXT: s_mov_b32 s13, s3			; VI-NEXT: s_mov_b32 s13, s3
	; VI-NEXT: s_mov_b32 s16, s4			; VI-NEXT: s_mov_b32 s16, s4
	; VI-NEXT: s_mov_b32 s17, s5			; VI-NEXT: s_mov_b32 s17, s5
	; VI-NEXT: s_mov_b32 s20, s6			; VI-NEXT: s_mov_b32 s20, s6
	; VI-NEXT: s_mov_b32 s21, s7			; VI-NEXT: s_mov_b32 s21, s7
	; VI-NEXT: s_waitcnt vmcnt(0)			; VI-NEXT: s_waitcnt vmcnt(0)
	▲ Show 20 Lines • Show All 423 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=source < %s \| FileCheck -check-prefix=RRLIST %s			; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=source < %s \| FileCheck -check-prefix=RRLIST %s
	; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=fast < %s \| FileCheck -check-prefix=FAST %s			; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=fast < %s \| FileCheck -check-prefix=FAST %s


	define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %pout.coerce) {			define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %pout.coerce) {
	; RRLIST-LABEL: sccClobber:			; RRLIST-LABEL: sccClobber:
	; RRLIST: ; %bb.0: ; %entry			; RRLIST: ; %bb.0: ; %entry
	; RRLIST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24			; RRLIST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
	; RRLIST-NEXT: v_mov_b32_e32 v2, 0			; RRLIST-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
				; RRLIST-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
				; RRLIST-NEXT: s_mov_b32 s22, -1
				; RRLIST-NEXT: s_mov_b32 s23, 0xe00000
				; RRLIST-NEXT: s_add_u32 s20, s20, s3
	; RRLIST-NEXT: s_waitcnt lgkmcnt(0)			; RRLIST-NEXT: s_waitcnt lgkmcnt(0)
	; RRLIST-NEXT: s_load_dword s16, s[8:9], 0x0			; RRLIST-NEXT: s_load_dword s16, s[8:9], 0x0
	; RRLIST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0			; RRLIST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
	; RRLIST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0			; RRLIST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
	; RRLIST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44			; RRLIST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44
	; RRLIST-NEXT: s_load_dword s17, s[10:11], 0x0			; RRLIST-NEXT: s_load_dword s17, s[10:11], 0x0
				; RRLIST-NEXT: s_addc_u32 s21, s21, 0
	; RRLIST-NEXT: s_waitcnt lgkmcnt(0)			; RRLIST-NEXT: s_waitcnt lgkmcnt(0)
	; RRLIST-NEXT: s_min_i32 s4, s16, 0
	; RRLIST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]			; RRLIST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
				; RRLIST-NEXT: s_min_i32 s4, s16, 0
	; RRLIST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]			; RRLIST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
	; RRLIST-NEXT: s_and_b64 s[0:1], vcc, exec			; RRLIST-NEXT: s_and_b64 s[0:1], vcc, exec
	; RRLIST-NEXT: s_cselect_b32 s0, s16, s17			; RRLIST-NEXT: s_cselect_b32 s0, s16, s17
	; RRLIST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]			; RRLIST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]
	; RRLIST-NEXT: s_cselect_b32 s0, s4, s0			; RRLIST-NEXT: s_cselect_b32 s0, s4, s0
				; RRLIST-NEXT: v_mov_b32_e32 v2, 0
	; RRLIST-NEXT: v_mov_b32_e32 v0, s0			; RRLIST-NEXT: v_mov_b32_e32 v0, s0
	; RRLIST-NEXT: global_store_dword v2, v0, s[14:15]			; RRLIST-NEXT: global_store_dword v2, v0, s[14:15]
	; RRLIST-NEXT: s_endpgm			; RRLIST-NEXT: s_endpgm
	;			;
	; FAST-LABEL: sccClobber:			; FAST-LABEL: sccClobber:
	; FAST: ; %bb.0: ; %entry			; FAST: ; %bb.0: ; %entry
	; FAST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24			; FAST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
	; FAST-NEXT: v_mov_b32_e32 v2, 0			; FAST-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
				; FAST-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
				; FAST-NEXT: s_mov_b32 s22, -1
				; FAST-NEXT: s_mov_b32 s23, 0xe00000
				; FAST-NEXT: s_add_u32 s20, s20, s3
	; FAST-NEXT: s_waitcnt lgkmcnt(0)			; FAST-NEXT: s_waitcnt lgkmcnt(0)
	; FAST-NEXT: s_load_dword s16, s[8:9], 0x0			; FAST-NEXT: s_load_dword s16, s[8:9], 0x0
	; FAST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0			; FAST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
	; FAST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0			; FAST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
	; FAST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44			; FAST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44
	; FAST-NEXT: s_load_dword s17, s[10:11], 0x0			; FAST-NEXT: s_load_dword s17, s[10:11], 0x0
				; FAST-NEXT: s_addc_u32 s21, s21, 0
	; FAST-NEXT: s_waitcnt lgkmcnt(0)			; FAST-NEXT: s_waitcnt lgkmcnt(0)
	; FAST-NEXT: s_min_i32 s4, s16, 0
	; FAST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]			; FAST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
				; FAST-NEXT: s_min_i32 s4, s16, 0
	; FAST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]			; FAST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
	; FAST-NEXT: s_and_b64 s[0:1], vcc, exec			; FAST-NEXT: s_and_b64 s[0:1], vcc, exec
	; FAST-NEXT: s_cselect_b32 s0, s16, s17			; FAST-NEXT: s_cselect_b32 s0, s16, s17
	; FAST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]			; FAST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]
	; FAST-NEXT: s_cselect_b32 s0, s4, s0			; FAST-NEXT: s_cselect_b32 s0, s4, s0
				; FAST-NEXT: v_mov_b32_e32 v2, 0
	; FAST-NEXT: v_mov_b32_e32 v0, s0			; FAST-NEXT: v_mov_b32_e32 v0, s0
	; FAST-NEXT: global_store_dword v2, v0, s[14:15]			; FAST-NEXT: global_store_dword v2, v0, s[14:15]
	; FAST-NEXT: s_endpgm			; FAST-NEXT: s_endpgm
	entry:			entry:
	%i = load i64, ptr addrspace(1) %a, align 8			%i = load i64, ptr addrspace(1) %a, align 8
	%i.1 = load i64, ptr addrspace(1) %b, align 8			%i.1 = load i64, ptr addrspace(1) %b, align 8
	%i.2 = load i32, ptr addrspace(1) %e, align 4			%i.2 = load i32, ptr addrspace(1) %e, align 4
	%i.3 = load i32, ptr addrspace(1) %f, align 4			%i.3 = load i32, ptr addrspace(1) %f, align 4
	Show All 10 Lines

llvm/test/CodeGen/AMDGPU/function-returns.ll

Show First 20 Lines • Show All 304 Lines • ▼ Show 20 Lines	define <4 x i64> @v4i64_func_void() #0 {
%ptr = load volatile <4 x i64> addrspace(1), <4 x i64> addrspace(1) addrspace(4)* undef		%ptr = load volatile <4 x i64> addrspace(1), <4 x i64> addrspace(1) addrspace(4)* undef
%val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr		%val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
ret <4 x i64> %val		ret <4 x i64> %val
}		}

; GCN-LABEL: {{^}}v5i64_func_void:		; GCN-LABEL: {{^}}v5i64_func_void:
; GCN-DAG: buffer_load_dwordx4 v[0:3], off		; GCN-DAG: buffer_load_dwordx4 v[0:3], off
; GCN-DAG: buffer_load_dwordx4 v[4:7], off		; GCN-DAG: buffer_load_dwordx4 v[4:7], off
; GCN-DAG: buffer_load_dwordx4 v[8:11], off		; GCN-DAG: buffer_load_dwordx2 v[8:9], off
; GCN: s_waitcnt vmcnt(0)		; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64		; GCN-NEXT: s_setpc_b64
define <5 x i64> @v5i64_func_void() #0 {		define <5 x i64> @v5i64_func_void() #0 {
%ptr = load volatile <5 x i64> addrspace(1), <5 x i64> addrspace(1) addrspace(4)* undef		%ptr = load volatile <5 x i64> addrspace(1), <5 x i64> addrspace(1) addrspace(4)* undef
%val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr		%val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
ret <5 x i64> %val		ret <5 x i64> %val
}		}

▲ Show 20 Lines • Show All 339 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll

	Show First 20 Lines • Show All 379 Lines • ▼ Show 20 Lines

	; FIXME: Should be able to fold zero input to movreld to inline imm?			; FIXME: Should be able to fold zero input to movreld to inline imm?

	; GCN-LABEL: {{^}}multi_same_block:			; GCN-LABEL: {{^}}multi_same_block:

	; GCN: s_load_dword [[ARG:s[0-9]+]]			; GCN: s_load_dword [[ARG:s[0-9]+]]

	; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000			; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
				; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
	; MOVREL: s_waitcnt			; MOVREL: s_waitcnt
	; MOVREL: s_add_i32 m0, [[ARG]], -16			; MOVREL: s_add_i32 m0, [[ARG]], -16
	; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0			; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0
	; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
	; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0			; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0
	; MOVREL: s_mov_b32 m0, -1			; MOVREL: s_mov_b32 m0, -1


	; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000			; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
				; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
	; IDXMODE: s_waitcnt			; IDXMODE: s_waitcnt
	; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16			; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16
	; IDXMODE: s_set_gpr_idx_on [[ARG]], gpr_idx(DST)
	; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0			; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0
	; IDXMODE: s_set_gpr_idx_off
	; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
	; IDXMODE: s_set_gpr_idx_on [[ARG]], gpr_idx(DST)
	; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0			; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0
	; IDXMODE: s_set_gpr_idx_off			; IDXMODE: s_set_gpr_idx_off

	; GCN: ds_write_b32			; GCN: ds_write_b32
	; GCN: ds_write_b32			; GCN: ds_write_b32
	; GCN: s_endpgm			; GCN: s_endpgm
	define amdgpu_kernel void @multi_same_block(i32 %arg) #0 {			define amdgpu_kernel void @multi_same_block(i32 %arg) #0 {
	bb:			bb:
	▲ Show 20 Lines • Show All 143 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Show First 20 Lines • Show All 604 Lines • ▼ Show 20 Lines	entry:
%v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel		%v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel
store <2 x double> %v, <2 x double> addrspace(1)* %out		store <2 x double> %v, <2 x double> addrspace(1)* %out
ret void		ret void
}		}

define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) {		define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) {
; GCN-LABEL: double5_inselt:		; GCN-LABEL: double5_inselt:
; GCN: ; %bb.0: ; %entry		; GCN: ; %bb.0: ; %entry
		; GCN-NEXT: s_mov_b32 s16, SCRATCH_RSRC_DWORD0
		; GCN-NEXT: s_mov_b32 s17, SCRATCH_RSRC_DWORD1
		; GCN-NEXT: s_mov_b32 s18, -1
		; GCN-NEXT: s_mov_b32 s19, 0xe80000
		; GCN-NEXT: s_add_u32 s16, s16, s3
; GCN-NEXT: s_load_dword s12, s[0:1], 0xa4		; GCN-NEXT: s_load_dword s12, s[0:1], 0xa4
; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x84		; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x84
; GCN-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24		; GCN-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64		; GCN-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64
		; GCN-NEXT: s_addc_u32 s17, s17, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)		; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_eq_u32 s12, 4		; GCN-NEXT: s_cmp_eq_u32 s12, 4
; GCN-NEXT: s_cselect_b32 s9, 0x3ff00000, s9		; GCN-NEXT: s_cselect_b32 s9, 0x3ff00000, s9
; GCN-NEXT: s_cselect_b32 s8, 0, s8		; GCN-NEXT: s_cselect_b32 s8, 0, s8
; GCN-NEXT: s_cmp_eq_u32 s12, 1		; GCN-NEXT: s_cmp_eq_u32 s12, 1
; GCN-NEXT: s_cselect_b32 s3, 0x3ff00000, s3		; GCN-NEXT: s_cselect_b32 s3, 0x3ff00000, s3
; GCN-NEXT: s_cselect_b32 s2, 0, s2		; GCN-NEXT: s_cselect_b32 s2, 0, s2
; GCN-NEXT: s_cmp_eq_u32 s12, 0		; GCN-NEXT: s_cmp_eq_u32 s12, 0
; GCN-NEXT: v_mov_b32_e32 v4, s8		; GCN-NEXT: s_cselect_b32 s13, 0x3ff00000, s1
; GCN-NEXT: v_mov_b32_e32 v5, s9		; GCN-NEXT: s_cselect_b32 s14, 0, s0
; GCN-NEXT: s_cselect_b32 s8, 0x3ff00000, s1
; GCN-NEXT: s_cselect_b32 s9, 0, s0
; GCN-NEXT: s_cmp_eq_u32 s12, 3		; GCN-NEXT: s_cmp_eq_u32 s12, 3
; GCN-NEXT: s_cselect_b32 s0, 0x3ff00000, s7		; GCN-NEXT: s_cselect_b32 s0, 0x3ff00000, s7
; GCN-NEXT: s_cselect_b32 s1, 0, s6		; GCN-NEXT: s_cselect_b32 s1, 0, s6
; GCN-NEXT: s_cmp_eq_u32 s12, 2		; GCN-NEXT: s_cmp_eq_u32 s12, 2
; GCN-NEXT: s_cselect_b32 s5, 0x3ff00000, s5		; GCN-NEXT: s_cselect_b32 s5, 0x3ff00000, s5
; GCN-NEXT: s_cselect_b32 s4, 0, s4		; GCN-NEXT: s_cselect_b32 s4, 0, s4
; GCN-NEXT: v_mov_b32_e32 v3, s0		; GCN-NEXT: v_mov_b32_e32 v3, s0
; GCN-NEXT: s_add_u32 s0, s10, 16		; GCN-NEXT: s_add_u32 s0, s10, 16
; GCN-NEXT: v_mov_b32_e32 v2, s1		; GCN-NEXT: v_mov_b32_e32 v2, s1
; GCN-NEXT: s_addc_u32 s1, s11, 0		; GCN-NEXT: s_addc_u32 s1, s11, 0
; GCN-NEXT: v_mov_b32_e32 v7, s1		; GCN-NEXT: v_mov_b32_e32 v5, s1
; GCN-NEXT: v_mov_b32_e32 v0, s4		; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5		; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: v_mov_b32_e32 v6, s0		; GCN-NEXT: v_mov_b32_e32 v4, s0
; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3]		; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: v_mov_b32_e32 v6, s10		; GCN-NEXT: v_mov_b32_e32 v4, s10
; GCN-NEXT: v_mov_b32_e32 v0, s9		; GCN-NEXT: s_add_u32 s0, s10, 32
; GCN-NEXT: v_mov_b32_e32 v1, s8		; GCN-NEXT: v_mov_b32_e32 v0, s14
		; GCN-NEXT: v_mov_b32_e32 v1, s13
; GCN-NEXT: v_mov_b32_e32 v2, s2		; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v3, s3		; GCN-NEXT: v_mov_b32_e32 v3, s3
; GCN-NEXT: v_mov_b32_e32 v7, s11		; GCN-NEXT: v_mov_b32_e32 v5, s11
; GCN-NEXT: s_add_u32 s0, s10, 32
; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
; GCN-NEXT: s_addc_u32 s1, s11, 0		; GCN-NEXT: s_addc_u32 s1, s11, 0
; GCN-NEXT: v_mov_b32_e32 v0, s0		; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: v_mov_b32_e32 v1, s1		; GCN-NEXT: s_nop 0
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[4:5]		; GCN-NEXT: v_mov_b32_e32 v3, s1
		; GCN-NEXT: v_mov_b32_e32 v0, s8
		; GCN-NEXT: v_mov_b32_e32 v1, s9
		; GCN-NEXT: v_mov_b32_e32 v2, s0
		; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN-NEXT: s_endpgm		; GCN-NEXT: s_endpgm
entry:		entry:
%v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel		%v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel
store <5 x double> %v, <5 x double> addrspace(1)* %out		store <5 x double> %v, <5 x double> addrspace(1)* %out
ret void		ret void
}		}

define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) {		define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) {
▲ Show 20 Lines • Show All 1,376 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll

	Show First 20 Lines • Show All 388 Lines • ▼ Show 20 Lines
	; GCN-NEXT: s_waitcnt vmcnt(0)			; GCN-NEXT: s_waitcnt vmcnt(0)
	; GCN-NEXT: s_setpc_b64 s[30:31]			; GCN-NEXT: s_setpc_b64 s[30:31]
	%tmp = load <4 x i32>, <4 x i32> addrspace(4)* undef			%tmp = load <4 x i32>, <4 x i32> addrspace(4)* undef
	%tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0			%tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
	%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0)			%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0)
	ret <4 x float> %tmp2			ret <4 x float> %tmp2
	}			}

				define <9 x float> @insertelement_to_v9f32_undef() nounwind {
				; GCN-LABEL: insertelement_to_v9f32_undef:
				; GCN: ; %bb.0:
				; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v0, 0x40a00000
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v1, s5
				; GCN-NEXT: v_mov_b32_e32 v2, s6
				; GCN-NEXT: v_mov_b32_e32 v3, s7
				; GCN-NEXT: v_mov_b32_e32 v4, s8
				; GCN-NEXT: v_mov_b32_e32 v5, s9
				; GCN-NEXT: v_mov_b32_e32 v6, s10
				; GCN-NEXT: v_mov_b32_e32 v7, s11
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: v_mov_b32_e32 v8, s4
				; GCN-NEXT: s_setpc_b64 s[30:31]
				%tmp = load <9 x float>, <9 x float> addrspace(4)* undef
				%tmp1 = insertelement <9 x float> %tmp, float 5.000, i32 0
				ret <9 x float> %tmp1
				}

				define <10 x float> @insertelement_to_v10f32_undef() nounwind {
				; GCN-LABEL: insertelement_to_v10f32_undef:
				; GCN: ; %bb.0:
				; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v0, 2.0
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v1, s5
				; GCN-NEXT: v_mov_b32_e32 v2, s6
				; GCN-NEXT: v_mov_b32_e32 v3, s7
				; GCN-NEXT: v_mov_b32_e32 v4, s8
				; GCN-NEXT: v_mov_b32_e32 v5, s9
				; GCN-NEXT: v_mov_b32_e32 v6, s10
				; GCN-NEXT: v_mov_b32_e32 v7, s11
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: v_mov_b32_e32 v8, s12
				; GCN-NEXT: v_mov_b32_e32 v9, s13
				; GCN-NEXT: s_setpc_b64 s[30:31]
				%tmp = load <10 x float>, <10 x float> addrspace(4)* undef
				%tmp1 = insertelement <10 x float> %tmp, float 2.0, i32 0
				ret <10 x float> %tmp1
				}

				define <11 x float> @insertelement_to_v11f32_undef() nounwind {
				; GCN-LABEL: insertelement_to_v11f32_undef:
				; GCN: ; %bb.0:
				; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v0, 0x40a00000
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v1, s5
				; GCN-NEXT: v_mov_b32_e32 v2, s6
				; GCN-NEXT: v_mov_b32_e32 v3, s7
				; GCN-NEXT: v_mov_b32_e32 v4, s8
				; GCN-NEXT: v_mov_b32_e32 v5, s9
				; GCN-NEXT: v_mov_b32_e32 v6, s10
				; GCN-NEXT: v_mov_b32_e32 v7, s11
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: v_mov_b32_e32 v8, s12
				; GCN-NEXT: v_mov_b32_e32 v9, s13
				; GCN-NEXT: v_mov_b32_e32 v10, s14
				; GCN-NEXT: s_setpc_b64 s[30:31]
				%tmp = load <11 x float>, <11 x float> addrspace(4)* undef
				%tmp1 = insertelement <11 x float> %tmp, float 5.000, i32 0
				ret <11 x float> %tmp1
				}

				define <12 x float> @insertelement_to_v12f32_undef() nounwind {
				; GCN-LABEL: insertelement_to_v12f32_undef:
				; GCN: ; %bb.0:
				; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v0, 2.0
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0
				; GCN-NEXT: v_mov_b32_e32 v1, s5
				; GCN-NEXT: v_mov_b32_e32 v2, s6
				; GCN-NEXT: v_mov_b32_e32 v3, s7
				; GCN-NEXT: v_mov_b32_e32 v4, s8
				; GCN-NEXT: v_mov_b32_e32 v5, s9
				; GCN-NEXT: v_mov_b32_e32 v6, s10
				; GCN-NEXT: v_mov_b32_e32 v7, s11
				; GCN-NEXT: s_waitcnt lgkmcnt(0)
				; GCN-NEXT: v_mov_b32_e32 v8, s12
				; GCN-NEXT: v_mov_b32_e32 v9, s13
				; GCN-NEXT: v_mov_b32_e32 v10, s14
				; GCN-NEXT: v_mov_b32_e32 v11, s15
				; GCN-NEXT: s_setpc_b64 s[30:31]
				%tmp = load <12 x float>, <12 x float> addrspace(4)* undef
				%tmp1 = insertelement <12 x float> %tmp, float 2.0, i32 0
				ret <12 x float> %tmp1
				}

	define amdgpu_kernel void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {			define amdgpu_kernel void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
	; SI-LABEL: dynamic_insertelement_v2f32:			; SI-LABEL: dynamic_insertelement_v2f32:
	; SI: ; %bb.0:			; SI: ; %bb.0:
	; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0			; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
	; SI-NEXT: s_load_dword s8, s[4:5], 0x4			; SI-NEXT: s_load_dword s8, s[4:5], 0x4
	; SI-NEXT: v_mov_b32_e32 v0, 0x40a00000			; SI-NEXT: v_mov_b32_e32 v0, 0x40a00000
	; SI-NEXT: s_mov_b32 s7, 0x100f000			; SI-NEXT: s_mov_b32 s7, 0x100f000
	; SI-NEXT: s_mov_b32 s6, -1			; SI-NEXT: s_mov_b32 s6, -1
	▲ Show 20 Lines • Show All 196 Lines • ▼ Show 20 Lines
	; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16			; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
	; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0			; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
	; VI-NEXT: s_endpgm			; VI-NEXT: s_endpgm
	%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b			%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
	store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32			store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
	ret void			ret void
	}			}

				define amdgpu_kernel void @dynamic_insertelement_v9f32(<9 x float> addrspace(1)* %out, <9 x float> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v9f32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dword s6, s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: v_mov_b32_e32 v9, 0x40a00000
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s6
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: v_movreld_b32_e32 v0, v9
				; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v9f32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: s_load_dword s6, s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: v_mov_b32_e32 v9, 0x40a00000
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: v_mov_b32_e32 v8, s6
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: v_movreld_b32_e32 v0, v9
				; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: s_endpgm
				%vecins = insertelement <9 x float> %a, float 5.000000e+00, i32 %b
				store <9 x float> %vecins, <9 x float> addrspace(1)* %out, align 32
				ret void
				}

				define amdgpu_kernel void @dynamic_insertelement_v10f32(<10 x float> addrspace(1)* %out, <10 x float> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v10f32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: v_mov_b32_e32 v10, 0x40a00000
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s6
				; SI-NEXT: v_mov_b32_e32 v9, s7
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: v_movreld_b32_e32 v0, v10
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v10f32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: v_mov_b32_e32 v10, 0x40a00000
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: v_mov_b32_e32 v8, s6
				; VI-NEXT: v_mov_b32_e32 v9, s7
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: v_movreld_b32_e32 v0, v10
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
				; VI-NEXT: s_endpgm
				%vecins = insertelement <10 x float> %a, float 5.000000e+00, i32 %b
				store <10 x float> %vecins, <10 x float> addrspace(1)* %out, align 32
				ret void
				}

				define amdgpu_kernel void @dynamic_insertelement_v11f32(<11 x float> addrspace(1)* %out, <11 x float> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v11f32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: v_mov_b32_e32 v11, 0x40a00000
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s16
				; SI-NEXT: v_mov_b32_e32 v9, s17
				; SI-NEXT: v_mov_b32_e32 v10, s18
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: v_movreld_b32_e32 v0, v11
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v11f32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: v_mov_b32_e32 v11, 0x40a00000
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v8, s8
				; VI-NEXT: v_mov_b32_e32 v9, s9
				; VI-NEXT: v_mov_b32_e32 v10, s10
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: v_movreld_b32_e32 v0, v11
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
				; VI-NEXT: s_endpgm
				%vecins = insertelement <11 x float> %a, float 5.000000e+00, i32 %b
				store <11 x float> %vecins, <11 x float> addrspace(1)* %out, align 32
				ret void
				}

				define amdgpu_kernel void @dynamic_insertelement_v12f32(<12 x float> addrspace(1)* %out, <12 x float> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v12f32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: v_mov_b32_e32 v12, 0x40a00000
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s16
				; SI-NEXT: v_mov_b32_e32 v9, s17
				; SI-NEXT: v_mov_b32_e32 v10, s18
				; SI-NEXT: v_mov_b32_e32 v11, s19
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: v_movreld_b32_e32 v0, v12
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v12f32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: v_mov_b32_e32 v12, 0x40a00000
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v8, s8
				; VI-NEXT: v_mov_b32_e32 v9, s9
				; VI-NEXT: v_mov_b32_e32 v10, s10
				; VI-NEXT: v_mov_b32_e32 v11, s11
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: v_movreld_b32_e32 v0, v12
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
				; VI-NEXT: s_endpgm
				%vecins = insertelement <12 x float> %a, float 5.000000e+00, i32 %b
				store <12 x float> %vecins, <12 x float> addrspace(1)* %out, align 32
				ret void
				}

	define amdgpu_kernel void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {			define amdgpu_kernel void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
	; SI-LABEL: dynamic_insertelement_v16f32:			; SI-LABEL: dynamic_insertelement_v16f32:
	; SI: ; %bb.0:			; SI: ; %bb.0:
	; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0			; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x10			; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x10
	; SI-NEXT: s_load_dword s4, s[4:5], 0x20			; SI-NEXT: s_load_dword s4, s[4:5], 0x20
	; SI-NEXT: v_mov_b32_e32 v16, 0x40a00000			; SI-NEXT: v_mov_b32_e32 v16, 0x40a00000
	; SI-NEXT: s_mov_b32 s3, 0x100f000			; SI-NEXT: s_mov_b32 s3, 0x100f000
	▲ Show 20 Lines • Show All 243 Lines • ▼ Show 20 Lines
	; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16			; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
	; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0			; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
	; VI-NEXT: s_endpgm			; VI-NEXT: s_endpgm
	%vecins = insertelement <8 x i32> %a, i32 5, i32 %b			%vecins = insertelement <8 x i32> %a, i32 5, i32 %b
	store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32			store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
	ret void			ret void
	}			}

				define amdgpu_kernel void @dynamic_insertelement_v9i32(<9 x i32> addrspace(1)* %out, <9 x i32> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v9i32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dword s6, s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s6
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: v_movreld_b32_e32 v0, 5
				; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v9i32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: s_load_dword s6, s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: v_mov_b32_e32 v8, s6
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: v_movreld_b32_e32 v0, 5
				; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: s_endpgm
				%vecins = insertelement <9 x i32> %a, i32 5, i32 %b
				store <9 x i32> %vecins, <9 x i32> addrspace(1)* %out, align 32
				ret void
				}

				define amdgpu_kernel void @dynamic_insertelement_v10i32(<10 x i32> addrspace(1)* %out, <10 x i32> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v10i32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s6
				; SI-NEXT: v_mov_b32_e32 v9, s7
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: v_movreld_b32_e32 v0, 5
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v10i32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: v_mov_b32_e32 v8, s6
				; VI-NEXT: v_mov_b32_e32 v9, s7
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: v_movreld_b32_e32 v0, 5
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
				; VI-NEXT: s_endpgm
				%vecins = insertelement <10 x i32> %a, i32 5, i32 %b
				store <10 x i32> %vecins, <10 x i32> addrspace(1)* %out, align 32
				ret void
				}

				define amdgpu_kernel void @dynamic_insertelement_v11i32(<11 x i32> addrspace(1)* %out, <11 x i32> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v11i32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s16
				; SI-NEXT: v_mov_b32_e32 v9, s17
				; SI-NEXT: v_mov_b32_e32 v10, s18
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: v_movreld_b32_e32 v0, 5
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v11i32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v8, s8
				; VI-NEXT: v_mov_b32_e32 v9, s9
				; VI-NEXT: v_mov_b32_e32 v10, s10
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: v_movreld_b32_e32 v0, 5
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
				; VI-NEXT: s_endpgm
				%vecins = insertelement <11 x i32> %a, i32 5, i32 %b
				store <11 x i32> %vecins, <11 x i32> addrspace(1)* %out, align 32
				ret void
				}

				define amdgpu_kernel void @dynamic_insertelement_v12i32(<12 x i32> addrspace(1)* %out, <12 x i32> %a, i32 %b) nounwind {
				; SI-LABEL: dynamic_insertelement_v12i32:
				; SI: ; %bb.0:
				; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
				; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
				; SI-NEXT: s_load_dword s4, s[4:5], 0x20
				; SI-NEXT: s_mov_b32 s3, 0x100f000
				; SI-NEXT: s_mov_b32 s2, -1
				; SI-NEXT: s_waitcnt lgkmcnt(0)
				; SI-NEXT: v_mov_b32_e32 v0, s8
				; SI-NEXT: v_mov_b32_e32 v1, s9
				; SI-NEXT: v_mov_b32_e32 v2, s10
				; SI-NEXT: v_mov_b32_e32 v3, s11
				; SI-NEXT: v_mov_b32_e32 v4, s12
				; SI-NEXT: v_mov_b32_e32 v5, s13
				; SI-NEXT: v_mov_b32_e32 v6, s14
				; SI-NEXT: v_mov_b32_e32 v7, s15
				; SI-NEXT: v_mov_b32_e32 v8, s16
				; SI-NEXT: v_mov_b32_e32 v9, s17
				; SI-NEXT: v_mov_b32_e32 v10, s18
				; SI-NEXT: v_mov_b32_e32 v11, s19
				; SI-NEXT: s_mov_b32 m0, s4
				; SI-NEXT: v_movreld_b32_e32 v0, 5
				; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
				; SI-NEXT: s_endpgm
				;
				; VI-LABEL: dynamic_insertelement_v12i32:
				; VI: ; %bb.0:
				; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
				; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
				; VI-NEXT: s_mov_b32 s3, 0x1100f000
				; VI-NEXT: s_mov_b32 s2, -1
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v0, s8
				; VI-NEXT: v_mov_b32_e32 v1, s9
				; VI-NEXT: v_mov_b32_e32 v2, s10
				; VI-NEXT: v_mov_b32_e32 v3, s11
				; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
				; VI-NEXT: s_load_dword s4, s[4:5], 0x80
				; VI-NEXT: v_mov_b32_e32 v4, s12
				; VI-NEXT: v_mov_b32_e32 v5, s13
				; VI-NEXT: v_mov_b32_e32 v6, s14
				; VI-NEXT: v_mov_b32_e32 v7, s15
				; VI-NEXT: s_waitcnt lgkmcnt(0)
				; VI-NEXT: v_mov_b32_e32 v8, s8
				; VI-NEXT: v_mov_b32_e32 v9, s9
				; VI-NEXT: v_mov_b32_e32 v10, s10
				; VI-NEXT: v_mov_b32_e32 v11, s11
				; VI-NEXT: s_mov_b32 m0, s4
				; VI-NEXT: v_movreld_b32_e32 v0, 5
				; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
				; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
				; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
				; VI-NEXT: s_endpgm
				%vecins = insertelement <12 x i32> %a, i32 5, i32 %b
				store <12 x i32> %vecins, <12 x i32> addrspace(1)* %out, align 32
				ret void
				}

	define amdgpu_kernel void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {			define amdgpu_kernel void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
	; SI-LABEL: dynamic_insertelement_v16i32:			; SI-LABEL: dynamic_insertelement_v16i32:
	; SI: ; %bb.0:			; SI: ; %bb.0:
	; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x10			; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x10
	; SI-NEXT: s_load_dword s6, s[4:5], 0x20			; SI-NEXT: s_load_dword s6, s[4:5], 0x20
	; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0			; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; SI-NEXT: s_mov_b32 s3, 0x100f000			; SI-NEXT: s_mov_b32 s3, 0x100f000
	; SI-NEXT: s_mov_b32 s2, -1			; SI-NEXT: s_mov_b32 s2, -1
	▲ Show 20 Lines • Show All 536 Lines • ▼ Show 20 Lines
	; the compiler doesn't crash.			; the compiler doesn't crash.
	define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {			define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
	; SI-LABEL: insert_split_bb:			; SI-LABEL: insert_split_bb:
	; SI: ; %bb.0: ; %entry			; SI: ; %bb.0: ; %entry
	; SI-NEXT: s_load_dword s6, s[4:5], 0x4			; SI-NEXT: s_load_dword s6, s[4:5], 0x4
	; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0			; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
	; SI-NEXT: s_waitcnt lgkmcnt(0)			; SI-NEXT: s_waitcnt lgkmcnt(0)
	; SI-NEXT: s_cmp_lg_u32 s6, 0			; SI-NEXT: s_cmp_lg_u32 s6, 0
	; SI-NEXT: s_cbranch_scc0 .LBB30_4			; SI-NEXT: s_cbranch_scc0 .LBB42_4
	; SI-NEXT: ; %bb.1: ; %else			; SI-NEXT: ; %bb.1: ; %else
	; SI-NEXT: s_load_dword s7, s[2:3], 0x1			; SI-NEXT: s_load_dword s7, s[2:3], 0x1
	; SI-NEXT: s_mov_b64 s[4:5], 0			; SI-NEXT: s_mov_b64 s[4:5], 0
	; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]			; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
	; SI-NEXT: s_waitcnt lgkmcnt(0)			; SI-NEXT: s_waitcnt lgkmcnt(0)
	; SI-NEXT: s_mov_b64 vcc, vcc			; SI-NEXT: s_mov_b64 vcc, vcc
	; SI-NEXT: s_cbranch_vccnz .LBB30_3			; SI-NEXT: s_cbranch_vccnz .LBB42_3
	; SI-NEXT: .LBB30_2: ; %if			; SI-NEXT: .LBB42_2: ; %if
	; SI-NEXT: s_load_dword s7, s[2:3], 0x0			; SI-NEXT: s_load_dword s7, s[2:3], 0x0
	; SI-NEXT: .LBB30_3: ; %endif			; SI-NEXT: .LBB42_3: ; %endif
	; SI-NEXT: s_waitcnt lgkmcnt(0)			; SI-NEXT: s_waitcnt lgkmcnt(0)
	; SI-NEXT: v_mov_b32_e32 v0, s6			; SI-NEXT: v_mov_b32_e32 v0, s6
	; SI-NEXT: s_mov_b32 s3, 0x100f000			; SI-NEXT: s_mov_b32 s3, 0x100f000
	; SI-NEXT: s_mov_b32 s2, -1			; SI-NEXT: s_mov_b32 s2, -1
	; SI-NEXT: v_mov_b32_e32 v1, s7			; SI-NEXT: v_mov_b32_e32 v1, s7
	; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0			; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
	; SI-NEXT: s_endpgm			; SI-NEXT: s_endpgm
	; SI-NEXT: .LBB30_4:			; SI-NEXT: .LBB42_4:
	; SI-NEXT: s_branch .LBB30_2			; SI-NEXT: s_branch .LBB42_2
	;			;
	; VI-LABEL: insert_split_bb:			; VI-LABEL: insert_split_bb:
	; VI: ; %bb.0: ; %entry			; VI: ; %bb.0: ; %entry
	; VI-NEXT: s_load_dword s6, s[4:5], 0x10			; VI-NEXT: s_load_dword s6, s[4:5], 0x10
	; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0			; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
	; VI-NEXT: s_waitcnt lgkmcnt(0)			; VI-NEXT: s_waitcnt lgkmcnt(0)
	; VI-NEXT: s_cmp_lg_u32 s6, 0			; VI-NEXT: s_cmp_lg_u32 s6, 0
	; VI-NEXT: s_cbranch_scc0 .LBB30_4			; VI-NEXT: s_cbranch_scc0 .LBB42_4
	; VI-NEXT: ; %bb.1: ; %else			; VI-NEXT: ; %bb.1: ; %else
	; VI-NEXT: s_load_dword s7, s[2:3], 0x4			; VI-NEXT: s_load_dword s7, s[2:3], 0x4
	; VI-NEXT: s_cbranch_execnz .LBB30_3			; VI-NEXT: s_cbranch_execnz .LBB42_3
	; VI-NEXT: .LBB30_2: ; %if			; VI-NEXT: .LBB42_2: ; %if
	; VI-NEXT: s_waitcnt lgkmcnt(0)			; VI-NEXT: s_waitcnt lgkmcnt(0)
	; VI-NEXT: s_load_dword s7, s[2:3], 0x0			; VI-NEXT: s_load_dword s7, s[2:3], 0x0
	; VI-NEXT: .LBB30_3: ; %endif			; VI-NEXT: .LBB42_3: ; %endif
	; VI-NEXT: s_waitcnt lgkmcnt(0)			; VI-NEXT: s_waitcnt lgkmcnt(0)
	; VI-NEXT: v_mov_b32_e32 v0, s6			; VI-NEXT: v_mov_b32_e32 v0, s6
	; VI-NEXT: s_mov_b32 s3, 0x1100f000			; VI-NEXT: s_mov_b32 s3, 0x1100f000
	; VI-NEXT: s_mov_b32 s2, -1			; VI-NEXT: s_mov_b32 s2, -1
	; VI-NEXT: v_mov_b32_e32 v1, s7			; VI-NEXT: v_mov_b32_e32 v1, s7
	; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0			; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
	; VI-NEXT: s_endpgm			; VI-NEXT: s_endpgm
	; VI-NEXT: .LBB30_4:			; VI-NEXT: .LBB42_4:
	; VI-NEXT: s_branch .LBB30_2			; VI-NEXT: s_branch .LBB42_2
	entry:			entry:
	%0 = insertelement <2 x i32> undef, i32 %a, i32 0			%0 = insertelement <2 x i32> undef, i32 %a, i32 0
	%1 = icmp eq i32 %a, 0			%1 = icmp eq i32 %a, 0
	br i1 %1, label %if, label %else			br i1 %1, label %if, label %else

	if:			if:
	%2 = load i32, i32 addrspace(1)* %in			%2 = load i32, i32 addrspace(1)* %in
	%3 = insertelement <2 x i32> %0, i32 %2, i32 1			%3 = insertelement <2 x i32> %0, i32 %2, i32 1
	▲ Show 20 Lines • Show All 319 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/ipra-regmask.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s \| FileCheck %s			; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s \| FileCheck %s
	; Make sure the expected regmask is generated for sub/superregisters.			; Make sure the expected regmask is generated for sub/superregisters.

	; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}			; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
	define void @csr() #0 {			define void @csr() #0 {
	call void asm sideeffect "", "~{v0},~{v44},~{v45}"() #0			call void asm sideeffect "", "~{v0},~{v44},~{v45}"() #0
	ret void			ret void
	}			}

	; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}			; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
	define void @subregs_for_super() #0 {			define void @subregs_for_super() #0 {
	call void asm sideeffect "", "~{v0},~{v1}"() #0			call void asm sideeffect "", "~{v0},~{v1}"() #0
	ret void			ret void
	}			}

	; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}			; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
	define void @clobbered_reg_with_sub() #0 {			define void @clobbered_reg_with_sub() #0 {
	call void asm sideeffect "", "~{v[0:1]}"() #0			call void asm sideeffect "", "~{v[0:1]}"() #0
	ret void			ret void
	}			}

	; CHECK-DAG: nothing Clobbered Registers: {{$}}			; CHECK-DAG: nothing Clobbered Registers: {{$}}
	define void @nothing() #0 {			define void @nothing() #0 {
	ret void			ret void
	Show All 14 Lines
	@llvm.used = appending global [6 x i8] [i8 bitcast (void ()* @csr to i8*),			@llvm.used = appending global [6 x i8] [i8 bitcast (void ()* @csr to i8*),
	i8* bitcast (void ()* @subregs_for_super to i8*),			i8* bitcast (void ()* @subregs_for_super to i8*),
	i8* bitcast (void ()* @clobbered_reg_with_sub to i8*),			i8* bitcast (void ()* @clobbered_reg_with_sub to i8*),
	i8* bitcast (void ()* @nothing to i8*),			i8* bitcast (void ()* @nothing to i8*),
	i8* bitcast (void ()* @special_regs to i8*),			i8* bitcast (void ()* @special_regs to i8*),
	i8* bitcast (void ()* @vcc to i8*)]			i8* bitcast (void ()* @vcc to i8*)]

	attributes #0 = { nounwind }			attributes #0 = { nounwind }
				;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
				; CHECK: {{.*}}

llvm/test/CodeGen/AMDGPU/kernel-args.ll

	Show First 20 Lines • Show All 2,280 Lines • ▼ Show 20 Lines
	; SI-NEXT: s_waitcnt expcnt(0)			; SI-NEXT: s_waitcnt expcnt(0)
	; SI-NEXT: v_mov_b32_e32 v0, s0			; SI-NEXT: v_mov_b32_e32 v0, s0
	; SI-NEXT: v_mov_b32_e32 v1, s1			; SI-NEXT: v_mov_b32_e32 v1, s1
	; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 offset:32			; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 offset:32
	; SI-NEXT: s_endpgm			; SI-NEXT: s_endpgm
	;			;
	; VI-LABEL: v5i64_arg:			; VI-LABEL: v5i64_arg:
	; VI: ; %bb.0: ; %entry			; VI: ; %bb.0: ; %entry
	; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x64			; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
	; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24			; VI-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x84
	; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x84			; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64
	; VI-NEXT: s_waitcnt lgkmcnt(0)			; VI-NEXT: s_waitcnt lgkmcnt(0)
	; VI-NEXT: v_mov_b32_e32 v0, s8			; VI-NEXT: s_add_u32 s12, s8, 32
	; VI-NEXT: s_add_u32 s8, s2, 16			; VI-NEXT: v_mov_b32_e32 v1, s10
	; VI-NEXT: v_mov_b32_e32 v1, s9			; VI-NEXT: s_addc_u32 s13, s9, 0
	; VI-NEXT: s_addc_u32 s9, s3, 0			; VI-NEXT: v_mov_b32_e32 v3, s12
	; VI-NEXT: v_mov_b32_e32 v4, s8			; VI-NEXT: v_mov_b32_e32 v2, s11
	; VI-NEXT: v_mov_b32_e32 v2, s10
	; VI-NEXT: v_mov_b32_e32 v3, s11
	; VI-NEXT: v_mov_b32_e32 v5, s9
	; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
	; VI-NEXT: v_mov_b32_e32 v5, s3
	; VI-NEXT: v_mov_b32_e32 v0, s4			; VI-NEXT: v_mov_b32_e32 v0, s4
				; VI-NEXT: v_mov_b32_e32 v4, s13
				; VI-NEXT: s_add_u32 s4, s8, 16
				; VI-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
	; VI-NEXT: v_mov_b32_e32 v1, s5			; VI-NEXT: v_mov_b32_e32 v1, s5
				; VI-NEXT: s_addc_u32 s5, s9, 0
				; VI-NEXT: v_mov_b32_e32 v4, s4
	; VI-NEXT: v_mov_b32_e32 v2, s6			; VI-NEXT: v_mov_b32_e32 v2, s6
	; VI-NEXT: v_mov_b32_e32 v3, s7			; VI-NEXT: v_mov_b32_e32 v3, s7
	; VI-NEXT: v_mov_b32_e32 v4, s2			; VI-NEXT: v_mov_b32_e32 v5, s5
	; VI-NEXT: s_add_u32 s2, s2, 32
	; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]			; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
	; VI-NEXT: s_addc_u32 s3, s3, 0			; VI-NEXT: v_mov_b32_e32 v4, s8
	; VI-NEXT: v_mov_b32_e32 v2, s2
	; VI-NEXT: v_mov_b32_e32 v0, s0			; VI-NEXT: v_mov_b32_e32 v0, s0
	; VI-NEXT: v_mov_b32_e32 v1, s1			; VI-NEXT: v_mov_b32_e32 v1, s1
				; VI-NEXT: v_mov_b32_e32 v2, s2
	; VI-NEXT: v_mov_b32_e32 v3, s3			; VI-NEXT: v_mov_b32_e32 v3, s3
	; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]			; VI-NEXT: v_mov_b32_e32 v5, s9
				; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
	; VI-NEXT: s_endpgm			; VI-NEXT: s_endpgm
	;			;
	; GFX9-LABEL: v5i64_arg:			; GFX9-LABEL: v5i64_arg:
	; GFX9: ; %bb.0: ; %entry			; GFX9: ; %bb.0: ; %entry
				; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
	; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40			; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
	; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0			; GFX9-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
	; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x60
	; GFX9-NEXT: v_mov_b32_e32 v4, 0			; GFX9-NEXT: v_mov_b32_e32 v4, 0
	; GFX9-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-NEXT: s_waitcnt lgkmcnt(0)
				; GFX9-NEXT: v_mov_b32_e32 v2, s1
				; GFX9-NEXT: v_mov_b32_e32 v1, s0
	; GFX9-NEXT: v_mov_b32_e32 v0, s12			; GFX9-NEXT: v_mov_b32_e32 v0, s12
				; GFX9-NEXT: global_store_dwordx2 v4, v[1:2], s[2:3] offset:32
	; GFX9-NEXT: v_mov_b32_e32 v1, s13			; GFX9-NEXT: v_mov_b32_e32 v1, s13
	; GFX9-NEXT: v_mov_b32_e32 v2, s14			; GFX9-NEXT: v_mov_b32_e32 v2, s14
	; GFX9-NEXT: v_mov_b32_e32 v3, s15			; GFX9-NEXT: v_mov_b32_e32 v3, s15
	; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16			; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
	; GFX9-NEXT: s_nop 0			; GFX9-NEXT: s_nop 0
	; GFX9-NEXT: v_mov_b32_e32 v0, s8			; GFX9-NEXT: v_mov_b32_e32 v0, s8
	; GFX9-NEXT: v_mov_b32_e32 v1, s9			; GFX9-NEXT: v_mov_b32_e32 v1, s9
	; GFX9-NEXT: v_mov_b32_e32 v2, s10			; GFX9-NEXT: v_mov_b32_e32 v2, s10
	; GFX9-NEXT: v_mov_b32_e32 v3, s11			; GFX9-NEXT: v_mov_b32_e32 v3, s11
	; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]			; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
	; GFX9-NEXT: s_nop 0
	; GFX9-NEXT: v_mov_b32_e32 v0, s0
	; GFX9-NEXT: v_mov_b32_e32 v1, s1
	; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[6:7] offset:32
	; GFX9-NEXT: s_endpgm			; GFX9-NEXT: s_endpgm
	;			;
	; EG-LABEL: v5i64_arg:			; EG-LABEL: v5i64_arg:
	; EG: ; %bb.0: ; %entry			; EG: ; %bb.0: ; %entry
	; EG-NEXT: ALU 18, @6, KC0[CB0:0-32], KC1[]			; EG-NEXT: ALU 18, @6, KC0[CB0:0-32], KC1[]
	; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 0			; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 0
	; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0			; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
	; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1			; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	; SI-NEXT: s_waitcnt expcnt(0)			; SI-NEXT: s_waitcnt expcnt(0)
	; SI-NEXT: v_mov_b32_e32 v0, s0			; SI-NEXT: v_mov_b32_e32 v0, s0
	; SI-NEXT: v_mov_b32_e32 v1, s1			; SI-NEXT: v_mov_b32_e32 v1, s1
	; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 offset:32			; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 offset:32
	; SI-NEXT: s_endpgm			; SI-NEXT: s_endpgm
	;			;
	; VI-LABEL: v5f64_arg:			; VI-LABEL: v5f64_arg:
	; VI: ; %bb.0: ; %entry			; VI: ; %bb.0: ; %entry
	; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x64			; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
	; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24			; VI-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x84
	; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x84			; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64
	; VI-NEXT: s_waitcnt lgkmcnt(0)			; VI-NEXT: s_waitcnt lgkmcnt(0)
	; VI-NEXT: v_mov_b32_e32 v0, s8			; VI-NEXT: s_add_u32 s12, s8, 32
	; VI-NEXT: s_add_u32 s8, s2, 16			; VI-NEXT: v_mov_b32_e32 v1, s10
	; VI-NEXT: v_mov_b32_e32 v1, s9			; VI-NEXT: s_addc_u32 s13, s9, 0
	; VI-NEXT: s_addc_u32 s9, s3, 0			; VI-NEXT: v_mov_b32_e32 v3, s12
	; VI-NEXT: v_mov_b32_e32 v4, s8			; VI-NEXT: v_mov_b32_e32 v2, s11
	; VI-NEXT: v_mov_b32_e32 v2, s10
	; VI-NEXT: v_mov_b32_e32 v3, s11
	; VI-NEXT: v_mov_b32_e32 v5, s9
	; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
	; VI-NEXT: v_mov_b32_e32 v5, s3
	; VI-NEXT: v_mov_b32_e32 v0, s4			; VI-NEXT: v_mov_b32_e32 v0, s4
				; VI-NEXT: v_mov_b32_e32 v4, s13
				; VI-NEXT: s_add_u32 s4, s8, 16
				; VI-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
	; VI-NEXT: v_mov_b32_e32 v1, s5			; VI-NEXT: v_mov_b32_e32 v1, s5
				; VI-NEXT: s_addc_u32 s5, s9, 0
				; VI-NEXT: v_mov_b32_e32 v4, s4
	; VI-NEXT: v_mov_b32_e32 v2, s6			; VI-NEXT: v_mov_b32_e32 v2, s6
	; VI-NEXT: v_mov_b32_e32 v3, s7			; VI-NEXT: v_mov_b32_e32 v3, s7
	; VI-NEXT: v_mov_b32_e32 v4, s2			; VI-NEXT: v_mov_b32_e32 v5, s5
	; VI-NEXT: s_add_u32 s2, s2, 32
	; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]			; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
	; VI-NEXT: s_addc_u32 s3, s3, 0			; VI-NEXT: v_mov_b32_e32 v4, s8
	; VI-NEXT: v_mov_b32_e32 v2, s2
	; VI-NEXT: v_mov_b32_e32 v0, s0			; VI-NEXT: v_mov_b32_e32 v0, s0
	; VI-NEXT: v_mov_b32_e32 v1, s1			; VI-NEXT: v_mov_b32_e32 v1, s1
				; VI-NEXT: v_mov_b32_e32 v2, s2
	; VI-NEXT: v_mov_b32_e32 v3, s3			; VI-NEXT: v_mov_b32_e32 v3, s3
	; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]			; VI-NEXT: v_mov_b32_e32 v5, s9
				; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
	; VI-NEXT: s_endpgm			; VI-NEXT: s_endpgm
	;			;
	; GFX9-LABEL: v5f64_arg:			; GFX9-LABEL: v5f64_arg:
	; GFX9: ; %bb.0: ; %entry			; GFX9: ; %bb.0: ; %entry
				; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
	; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40			; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
	; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0			; GFX9-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
	; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x60
	; GFX9-NEXT: v_mov_b32_e32 v4, 0			; GFX9-NEXT: v_mov_b32_e32 v4, 0
	; GFX9-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-NEXT: s_waitcnt lgkmcnt(0)
				; GFX9-NEXT: v_mov_b32_e32 v2, s1
				; GFX9-NEXT: v_mov_b32_e32 v1, s0
	; GFX9-NEXT: v_mov_b32_e32 v0, s12			; GFX9-NEXT: v_mov_b32_e32 v0, s12
				; GFX9-NEXT: global_store_dwordx2 v4, v[1:2], s[2:3] offset:32
	; GFX9-NEXT: v_mov_b32_e32 v1, s13			; GFX9-NEXT: v_mov_b32_e32 v1, s13
	; GFX9-NEXT: v_mov_b32_e32 v2, s14			; GFX9-NEXT: v_mov_b32_e32 v2, s14
	; GFX9-NEXT: v_mov_b32_e32 v3, s15			; GFX9-NEXT: v_mov_b32_e32 v3, s15
	; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16			; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
	; GFX9-NEXT: s_nop 0			; GFX9-NEXT: s_nop 0
	; GFX9-NEXT: v_mov_b32_e32 v0, s8			; GFX9-NEXT: v_mov_b32_e32 v0, s8
	; GFX9-NEXT: v_mov_b32_e32 v1, s9			; GFX9-NEXT: v_mov_b32_e32 v1, s9
	; GFX9-NEXT: v_mov_b32_e32 v2, s10			; GFX9-NEXT: v_mov_b32_e32 v2, s10
	; GFX9-NEXT: v_mov_b32_e32 v3, s11			; GFX9-NEXT: v_mov_b32_e32 v3, s11
	; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]			; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
	; GFX9-NEXT: s_nop 0
	; GFX9-NEXT: v_mov_b32_e32 v0, s0
	; GFX9-NEXT: v_mov_b32_e32 v1, s1
	; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[6:7] offset:32
	; GFX9-NEXT: s_endpgm			; GFX9-NEXT: s_endpgm
	;			;
	; EG-LABEL: v5f64_arg:			; EG-LABEL: v5f64_arg:
	; EG: ; %bb.0: ; %entry			; EG: ; %bb.0: ; %entry
	; EG-NEXT: ALU 18, @6, KC0[CB0:0-32], KC1[]			; EG-NEXT: ALU 18, @6, KC0[CB0:0-32], KC1[]
	; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 0			; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 0
	; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0			; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
	; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1			; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
	▲ Show 20 Lines • Show All 3,867 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll

	Show All 26 Lines
	; NSA: image_sample v[0:3], [v1, v2, v0],			; NSA: image_sample v[0:3], [v1, v2, v0],
	define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) {			define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_d_3d:			; GCN-LABEL: {{^}}sample_d_3d:
	; GFX1010-NSA: image_sample_d v[0:3], v[7:22],			; GFX1010-NSA: image_sample_d v[0:3], v[7:15],
	; GFX1030-NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],			; GFX1030-NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],
	; GFX11-NSA: image_sample_d v[0:3], v[7:22],			; GFX11-NSA: image_sample_d v[0:3], v[7:15],
	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) {			define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_contig_nsa:			; GCN-LABEL: {{^}}sample_contig_nsa:
	; GFX10-NONSA: image_sample_c_l v5, v[0:4],			; GFX10-NONSA: image_sample_c_l v5, v[0:4],
	▲ Show 20 Lines • Show All 120 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll

	Show First 20 Lines • Show All 1,562 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
	; VERDE-LABEL: sample_c_d_o_2darray_V1:			; VERDE-LABEL: sample_c_d_o_2darray_V1:
	; VERDE: ; %bb.0: ; %main_body			; VERDE: ; %bb.0: ; %main_body
	; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da			; VERDE-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
	; VERDE-NEXT: s_waitcnt vmcnt(0)			; VERDE-NEXT: s_waitcnt vmcnt(0)
	; VERDE-NEXT: ; return to shader part epilog			; VERDE-NEXT: ; return to shader part epilog
	;			;
	; GFX6789-LABEL: sample_c_d_o_2darray_V1:			; GFX6789-LABEL: sample_c_d_o_2darray_V1:
	; GFX6789: ; %bb.0: ; %main_body			; GFX6789: ; %bb.0: ; %main_body
	; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da			; GFX6789-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
	; GFX6789-NEXT: s_waitcnt vmcnt(0)			; GFX6789-NEXT: s_waitcnt vmcnt(0)
	; GFX6789-NEXT: ; return to shader part epilog			; GFX6789-NEXT: ; return to shader part epilog
	;			;
	; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:			; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
	; GFX10PLUS: ; %bb.0: ; %main_body			; GFX10PLUS: ; %bb.0: ; %main_body
	; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)			; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
	; GFX10PLUS-NEXT: ; return to shader part epilog			; GFX10PLUS-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {			define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {
	; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:			; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
	; VERDE: ; %bb.0: ; %main_body			; VERDE: ; %bb.0: ; %main_body
	; VERDE-NEXT: v_mov_b32_e32 v9, 0			; VERDE-NEXT: v_mov_b32_e32 v9, 0
	; VERDE-NEXT: v_mov_b32_e32 v10, v9			; VERDE-NEXT: v_mov_b32_e32 v10, v9
	; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da			; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
	; VERDE-NEXT: s_mov_b32 s15, 0xf000			; VERDE-NEXT: s_mov_b32 s15, 0xf000
	; VERDE-NEXT: s_mov_b32 s14, -1			; VERDE-NEXT: s_mov_b32 s14, -1
	; VERDE-NEXT: s_waitcnt vmcnt(0)			; VERDE-NEXT: s_waitcnt vmcnt(0)
	; VERDE-NEXT: v_mov_b32_e32 v0, v9			; VERDE-NEXT: v_mov_b32_e32 v0, v9
	; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0			; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0
	; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)			; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
	; VERDE-NEXT: ; return to shader part epilog			; VERDE-NEXT: ; return to shader part epilog
	;			;
	; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:			; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
	; GFX6789: ; %bb.0: ; %main_body			; GFX6789: ; %bb.0: ; %main_body
	; GFX6789-NEXT: v_mov_b32_e32 v11, 0			; GFX6789-NEXT: v_mov_b32_e32 v11, 0
	; GFX6789-NEXT: v_mov_b32_e32 v12, v11			; GFX6789-NEXT: v_mov_b32_e32 v12, v11
	; GFX6789-NEXT: v_mov_b32_e32 v9, v11			; GFX6789-NEXT: v_mov_b32_e32 v9, v11
	; GFX6789-NEXT: v_mov_b32_e32 v10, v12			; GFX6789-NEXT: v_mov_b32_e32 v10, v12
	; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da			; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
	; GFX6789-NEXT: s_waitcnt vmcnt(0)			; GFX6789-NEXT: s_waitcnt vmcnt(0)
	; GFX6789-NEXT: v_mov_b32_e32 v0, v9			; GFX6789-NEXT: v_mov_b32_e32 v0, v9
	; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]			; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]
	; GFX6789-NEXT: s_waitcnt vmcnt(0)			; GFX6789-NEXT: s_waitcnt vmcnt(0)
	; GFX6789-NEXT: ; return to shader part epilog			; GFX6789-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:			; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v11, 0			; GFX10-NEXT: v_mov_b32_e32 v11, 0
	; GFX10-NEXT: v_mov_b32_e32 v12, v11			; GFX10-NEXT: v_mov_b32_e32 v12, v11
	; GFX10-NEXT: v_mov_b32_e32 v9, v11			; GFX10-NEXT: v_mov_b32_e32 v9, v11
	; GFX10-NEXT: v_mov_b32_e32 v10, v12			; GFX10-NEXT: v_mov_b32_e32 v10, v12
	; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe			; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: v_mov_b32_e32 v0, v9			; GFX10-NEXT: v_mov_b32_e32 v0, v9
	; GFX10-NEXT: global_store_dword v11, v10, s[12:13]			; GFX10-NEXT: global_store_dword v11, v10, s[12:13]
	; GFX10-NEXT: s_waitcnt_vscnt null, 0x0			; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:			; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
	; GFX11: ; %bb.0: ; %main_body			; GFX11: ; %bb.0: ; %main_body
	; GFX11-NEXT: v_mov_b32_e32 v11, 0			; GFX11-NEXT: v_mov_b32_e32 v11, 0
	; GFX11-NEXT: v_mov_b32_e32 v12, v11			; GFX11-NEXT: v_mov_b32_e32 v12, v11
	; GFX11-NEXT: v_dual_mov_b32 v9, v11 :: v_dual_mov_b32 v10, v12			; GFX11-NEXT: v_dual_mov_b32 v9, v11 :: v_dual_mov_b32 v10, v12
	; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe			; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
	; GFX11-NEXT: s_waitcnt vmcnt(0)			; GFX11-NEXT: s_waitcnt vmcnt(0)
	; GFX11-NEXT: v_mov_b32_e32 v0, v9			; GFX11-NEXT: v_mov_b32_e32 v0, v9
	; GFX11-NEXT: global_store_b32 v11, v10, s[12:13]			; GFX11-NEXT: global_store_b32 v11, v10, s[12:13]
	; GFX11-NEXT: s_waitcnt_vscnt null, 0x0			; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
	; GFX11-NEXT: ; return to shader part epilog			; GFX11-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)			%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
	%v.vec = extractvalue {float, i32} %v, 0			%v.vec = extractvalue {float, i32} %v, 0
	%v.err = extractvalue {float, i32} %v, 1			%v.err = extractvalue {float, i32} %v, 1
	store i32 %v.err, i32 addrspace(1)* %out, align 4			store i32 %v.err, i32 addrspace(1)* %out, align 4
	ret float %v.vec			ret float %v.vec
	}			}

	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
	; VERDE-LABEL: sample_c_d_o_2darray_V2:			; VERDE-LABEL: sample_c_d_o_2darray_V2:
	; VERDE: ; %bb.0: ; %main_body			; VERDE: ; %bb.0: ; %main_body
	; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da			; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
	; VERDE-NEXT: s_waitcnt vmcnt(0)			; VERDE-NEXT: s_waitcnt vmcnt(0)
	; VERDE-NEXT: ; return to shader part epilog			; VERDE-NEXT: ; return to shader part epilog
	;			;
	; GFX6789-LABEL: sample_c_d_o_2darray_V2:			; GFX6789-LABEL: sample_c_d_o_2darray_V2:
	; GFX6789: ; %bb.0: ; %main_body			; GFX6789: ; %bb.0: ; %main_body
	; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da			; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
	; GFX6789-NEXT: s_waitcnt vmcnt(0)			; GFX6789-NEXT: s_waitcnt vmcnt(0)
	; GFX6789-NEXT: ; return to shader part epilog			; GFX6789-NEXT: ; return to shader part epilog
	;			;
	; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:			; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
	; GFX10PLUS: ; %bb.0: ; %main_body			; GFX10PLUS: ; %bb.0: ; %main_body
	; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)			; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
	; GFX10PLUS-NEXT: ; return to shader part epilog			; GFX10PLUS-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <2 x float> %v			ret <2 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
	; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:			; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
	; VERDE: ; %bb.0: ; %main_body			; VERDE: ; %bb.0: ; %main_body
	; VERDE-NEXT: v_mov_b32_e32 v9, 0			; VERDE-NEXT: v_mov_b32_e32 v9, 0
	; VERDE-NEXT: v_mov_b32_e32 v10, v9			; VERDE-NEXT: v_mov_b32_e32 v10, v9
	; VERDE-NEXT: v_mov_b32_e32 v11, v9			; VERDE-NEXT: v_mov_b32_e32 v11, v9
	; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da			; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
	; VERDE-NEXT: s_waitcnt vmcnt(0)			; VERDE-NEXT: s_waitcnt vmcnt(0)
	; VERDE-NEXT: v_mov_b32_e32 v0, v9			; VERDE-NEXT: v_mov_b32_e32 v0, v9
	; VERDE-NEXT: v_mov_b32_e32 v1, v10			; VERDE-NEXT: v_mov_b32_e32 v1, v10
	; VERDE-NEXT: v_mov_b32_e32 v2, v11			; VERDE-NEXT: v_mov_b32_e32 v2, v11
	; VERDE-NEXT: ; return to shader part epilog			; VERDE-NEXT: ; return to shader part epilog
	;			;
	; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:			; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
	; GFX6789: ; %bb.0: ; %main_body			; GFX6789: ; %bb.0: ; %main_body
	; GFX6789-NEXT: v_mov_b32_e32 v9, 0			; GFX6789-NEXT: v_mov_b32_e32 v9, 0
	; GFX6789-NEXT: v_mov_b32_e32 v10, v9			; GFX6789-NEXT: v_mov_b32_e32 v10, v9
	; GFX6789-NEXT: v_mov_b32_e32 v11, v9			; GFX6789-NEXT: v_mov_b32_e32 v11, v9
	; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da			; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
	; GFX6789-NEXT: s_waitcnt vmcnt(0)			; GFX6789-NEXT: s_waitcnt vmcnt(0)
	; GFX6789-NEXT: v_mov_b32_e32 v0, v9			; GFX6789-NEXT: v_mov_b32_e32 v0, v9
	; GFX6789-NEXT: v_mov_b32_e32 v1, v10			; GFX6789-NEXT: v_mov_b32_e32 v1, v10
	; GFX6789-NEXT: v_mov_b32_e32 v2, v11			; GFX6789-NEXT: v_mov_b32_e32 v2, v11
	; GFX6789-NEXT: ; return to shader part epilog			; GFX6789-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:			; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0			; GFX10-NEXT: v_mov_b32_e32 v9, 0
	; GFX10-NEXT: v_mov_b32_e32 v10, v9			; GFX10-NEXT: v_mov_b32_e32 v10, v9
	; GFX10-NEXT: v_mov_b32_e32 v11, v9			; GFX10-NEXT: v_mov_b32_e32 v11, v9
	; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe			; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: v_mov_b32_e32 v0, v9			; GFX10-NEXT: v_mov_b32_e32 v0, v9
	; GFX10-NEXT: v_mov_b32_e32 v1, v10			; GFX10-NEXT: v_mov_b32_e32 v1, v10
	; GFX10-NEXT: v_mov_b32_e32 v2, v11			; GFX10-NEXT: v_mov_b32_e32 v2, v11
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe:			; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe:
	; GFX11: ; %bb.0: ; %main_body			; GFX11: ; %bb.0: ; %main_body
	; GFX11-NEXT: v_mov_b32_e32 v9, 0			; GFX11-NEXT: v_mov_b32_e32 v9, 0
	; GFX11-NEXT: v_mov_b32_e32 v10, v9			; GFX11-NEXT: v_mov_b32_e32 v10, v9
	; GFX11-NEXT: v_mov_b32_e32 v11, v9			; GFX11-NEXT: v_mov_b32_e32 v11, v9
	; GFX11-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe			; GFX11-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
	; GFX11-NEXT: s_waitcnt vmcnt(0)			; GFX11-NEXT: s_waitcnt vmcnt(0)
	; GFX11-NEXT: v_mov_b32_e32 v2, v11			; GFX11-NEXT: v_mov_b32_e32 v2, v11
	; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10			; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10
	; GFX11-NEXT: ; return to shader part epilog			; GFX11-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)			%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
	%v.vec = extractvalue {<2 x float>, i32} %v, 0			%v.vec = extractvalue {<2 x float>, i32} %v, 0
	%v.f1 = extractelement <2 x float> %v.vec, i32 0			%v.f1 = extractelement <2 x float> %v.vec, i32 0
	▲ Show 20 Lines • Show All 502 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll

	Show First 20 Lines • Show All 180 Lines • ▼ Show 20 Lines
	; GCN: image_sample_c_d_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf{{$}}			; GCN: image_sample_c_d_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf{{$}}
	define amdgpu_ps <4 x float> @sample_c_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_d_cl_o_2d:			; GCN-LABEL: {{^}}sample_c_d_cl_o_2d:
	; GCN: image_sample_c_d_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}			; GCN: image_sample_c_d_cl_o v[0:3], v[0:8], s[0:7], s[8:11] dmask:0xf{{$}}
	define amdgpu_ps <4 x float> @sample_c_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cd_o_1d:			; GCN-LABEL: {{^}}sample_cd_o_1d:
	; GCN: image_sample_cd_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}			; GCN: image_sample_cd_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
	▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines
	; GCN: image_sample_c_cd_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf{{$}}			; GCN: image_sample_c_cd_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf{{$}}
	define amdgpu_ps <4 x float> @sample_c_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_cd_cl_o_2d:			; GCN-LABEL: {{^}}sample_c_cd_cl_o_2d:
	; GCN: image_sample_c_cd_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}			; GCN: image_sample_c_cd_cl_o v[0:3], v[0:8], s[0:7], s[8:11] dmask:0xf{{$}}
	define amdgpu_ps <4 x float> @sample_c_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_l_o_1d:			; GCN-LABEL: {{^}}sample_l_o_1d:
	; GCN: image_sample_l_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}			; GCN: image_sample_l_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
	▲ Show 20 Lines • Show All 110 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll

Show All 14 Lines
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)		declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)

; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget		; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget
; Arguments are flattened to represent the actual VGPR_A layout, so we have no		; Arguments are flattened to represent the actual VGPR_A layout, so we have no
; extra moves in the generated kernel.		; extra moves in the generated kernel.
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {		define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
; GCN-LABEL: image_bvh_intersect_ray:		; GCN-LABEL: image_bvh_intersect_ray:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]		; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[0:3]
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0		%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1		%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2		%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0		%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1		%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	main_body:
ret <4 x float> %r		ret <4 x float> %r
}		}

; Arguments are flattened to represent the actual VGPR_A layout, so we have no		; Arguments are flattened to represent the actual VGPR_A layout, so we have no
; extra moves in the generated kernel.		; extra moves in the generated kernel.
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {		define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
; GCN-LABEL: image_bvh64_intersect_ray:		; GCN-LABEL: image_bvh64_intersect_ray:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]		; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64		%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64
%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0		%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1		%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2		%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0		%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0
Show All 21 Lines
; GFX10-NEXT: v_mov_b32_e32 v3, s3		; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: v_mov_b32_e32 v4, s4		; GFX10-NEXT: v_mov_b32_e32 v4, s4
; GFX10-NEXT: v_mov_b32_e32 v5, s5		; GFX10-NEXT: v_mov_b32_e32 v5, s5
; GFX10-NEXT: v_mov_b32_e32 v6, s6		; GFX10-NEXT: v_mov_b32_e32 v6, s6
; GFX10-NEXT: v_mov_b32_e32 v7, s7		; GFX10-NEXT: v_mov_b32_e32 v7, s7
; GFX10-NEXT: v_mov_b32_e32 v8, s8		; GFX10-NEXT: v_mov_b32_e32 v8, s8
; GFX10-NEXT: s_mov_b32 s15, s13		; GFX10-NEXT: s_mov_b32 s15, s13
; GFX10-NEXT: s_mov_b32 s13, s11		; GFX10-NEXT: s_mov_b32 s13, s11
; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[12:15] a16		; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[12:15] a16
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_a16:		; GFX11-LABEL: image_bvh64_intersect_ray_a16:
; GFX11: ; %bb.0: ; %main_body		; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s4		; GFX11-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s4
; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v7, s1		; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v7, s1
; GFX11-NEXT: s_lshr_b32 s3, s6, 16		; GFX11-NEXT: s_lshr_b32 s3, s6, 16
Show All 37 Lines
; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, s0, s3, 0, s0		; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, s0, s3, 0, s0
; GFX1013-NEXT: flat_load_dword v0, v[2:3]		; GFX1013-NEXT: flat_load_dword v0, v[2:3]
; GFX1013-NEXT: flat_load_dword v1, v[4:5]		; GFX1013-NEXT: flat_load_dword v1, v[4:5]
; GFX1013-NEXT: v_mov_b32_e32 v2, 0		; GFX1013-NEXT: v_mov_b32_e32 v2, 0
; GFX1013-NEXT: v_mov_b32_e32 v3, 1.0		; GFX1013-NEXT: v_mov_b32_e32 v3, 1.0
; GFX1013-NEXT: v_mov_b32_e32 v4, 2.0		; GFX1013-NEXT: v_mov_b32_e32 v4, 2.0
; GFX1013-NEXT: v_mov_b32_e32 v5, 0x40400000		; GFX1013-NEXT: v_mov_b32_e32 v5, 0x40400000
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]		; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1013-NEXT: s_endpgm		; GFX1013-NEXT: s_endpgm
;		;
; GFX1030-LABEL: image_bvh_intersect_ray_nsa_reassign:		; GFX1030-LABEL: image_bvh_intersect_ray_nsa_reassign:
; GFX1030: ; %bb.0: ; %main_body		; GFX1030: ; %bb.0: ; %main_body
; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24		; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 2, v0		; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 2, v0
Show All 9 Lines
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0		; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
; GFX1030-NEXT: v_add_co_u32 v2, s0, s2, v2		; GFX1030-NEXT: v_add_co_u32 v2, s0, s2, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, s3, 0, s0		; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, s3, 0, s0
; GFX1030-NEXT: flat_load_dword v0, v[0:1]		; GFX1030-NEXT: flat_load_dword v0, v[0:1]
; GFX1030-NEXT: flat_load_dword v1, v[2:3]		; GFX1030-NEXT: flat_load_dword v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, 0		; GFX1030-NEXT: v_mov_b32_e32 v2, 0
; GFX1030-NEXT: v_mov_b32_e32 v3, 1.0		; GFX1030-NEXT: v_mov_b32_e32 v3, 1.0
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]		; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1030-NEXT: s_endpgm		; GFX1030-NEXT: s_endpgm
;		;
; GFX11-LABEL: image_bvh_intersect_ray_nsa_reassign:		; GFX11-LABEL: image_bvh_intersect_ray_nsa_reassign:
; GFX11: ; %bb.0: ; %main_body		; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24		; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
; GFX11-NEXT: v_dual_mov_b32 v5, 0x40a00000 :: v_dual_lshlrev_b32 v2, 2, v0		; GFX11-NEXT: v_dual_mov_b32 v5, 0x40a00000 :: v_dual_lshlrev_b32 v2, 2, v0
▲ Show 20 Lines • Show All 145 Lines • ▼ Show 20 Lines
; GFX1013-NEXT: v_mov_b32_e32 v11, 0x41000000		; GFX1013-NEXT: v_mov_b32_e32 v11, 0x41000000
; GFX1013-NEXT: s_waitcnt lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt lgkmcnt(0)
; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0		; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0
; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4		; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
; GFX1013-NEXT: flat_load_dword v2, v[0:1]		; GFX1013-NEXT: flat_load_dword v2, v[0:1]
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7		; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]		; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1013-NEXT: s_endpgm		; GFX1013-NEXT: s_endpgm
;		;
; GFX1030-LABEL: image_bvh64_intersect_ray_nsa_reassign:		; GFX1030-LABEL: image_bvh64_intersect_ray_nsa_reassign:
; GFX1030: ; %bb.0: ; %main_body		; GFX1030: ; %bb.0: ; %main_body
; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24		; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 2, v0		; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Show All 9 Lines
; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0		; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0		; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4		; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4
; GFX1030-NEXT: flat_load_dword v2, v[0:1]		; GFX1030-NEXT: flat_load_dword v2, v[0:1]
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7		; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]		; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1030-NEXT: s_endpgm		; GFX1030-NEXT: s_endpgm
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_nsa_reassign:		; GFX11-LABEL: image_bvh64_intersect_ray_nsa_reassign:
; GFX11: ; %bb.0: ; %main_body		; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24		; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0		; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines
; GFX1013-NEXT: v_mov_b32_e32 v8, 0x48004700		; GFX1013-NEXT: v_mov_b32_e32 v8, 0x48004700
; GFX1013-NEXT: s_waitcnt lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt lgkmcnt(0)
; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0		; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0
; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4		; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
; GFX1013-NEXT: flat_load_dword v2, v[0:1]		; GFX1013-NEXT: flat_load_dword v2, v[0:1]
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6		; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16		; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
; GFX1013-NEXT: s_waitcnt vmcnt(0)		; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1013-NEXT: s_endpgm		; GFX1013-NEXT: s_endpgm
;		;
; GFX1030-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:		; GFX1030-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:
; GFX1030: ; %bb.0: ; %main_body		; GFX1030: ; %bb.0: ; %main_body
; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24		; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 2, v0		; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34		; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
; GFX1030-NEXT: v_mov_b32_e32 v3, 0		; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mov_b32_e32 v5, 2.0		; GFX1030-NEXT: v_mov_b32_e32 v5, 2.0
; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0		; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0
; GFX1030-NEXT: v_mov_b32_e32 v6, 0x44004200		; GFX1030-NEXT: v_mov_b32_e32 v6, 0x44004200
; GFX1030-NEXT: v_mov_b32_e32 v7, 0x46004500		; GFX1030-NEXT: v_mov_b32_e32 v7, 0x46004500
; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700		; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0		; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4		; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4
; GFX1030-NEXT: flat_load_dword v2, v[0:1]		; GFX1030-NEXT: flat_load_dword v2, v[0:1]
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102		; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c6		; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c6
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16		; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
; GFX1030-NEXT: s_waitcnt vmcnt(0)		; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]		; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
; GFX1030-NEXT: s_endpgm		; GFX1030-NEXT: s_endpgm
;		;
; GFX11-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:		; GFX11-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:
; GFX11: ; %bb.0: ; %main_body		; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24		; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0		; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Show All 36 Lines

llvm/test/CodeGen/AMDGPU/load-constant-i32.ll

	Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines
	; EG: VTX_READ_128			; EG: VTX_READ_128
	define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {			define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
	entry:			entry:
	%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in			%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
	store <8 x i32> %ld, <8 x i32> addrspace(1)* %out			store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
	ret void			ret void
	}			}

				; FUNC-LABEL: {{^}}constant_load_v9i32:
				; GCN: s_load_dword
				; GCN: s_load_dwordx8

				; EG: VTX_READ_128
				; EG: VTX_READ_128
				; EG: VTX_READ_32
				define amdgpu_kernel void @constant_load_v9i32(<9 x i32> addrspace(1)* %out, <9 x i32> addrspace(4)* %in) #0 {
				entry:
				%ld = load <9 x i32>, <9 x i32> addrspace(4)* %in
				store <9 x i32> %ld, <9 x i32> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}constant_load_v10i32:
				; GCN: s_load_dwordx2
				; GCN: s_load_dwordx8

				; EG: VTX_READ_128
				; EG: VTX_READ_128
				; EG: VTX_READ_128
				define amdgpu_kernel void @constant_load_v10i32(<10 x i32> addrspace(1)* %out, <10 x i32> addrspace(4)* %in) #0 {
				entry:
				%ld = load <10 x i32>, <10 x i32> addrspace(4)* %in
				store <10 x i32> %ld, <10 x i32> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}constant_load_v11i32:
				; GCN: s_load_dwordx4
				; GCN: s_load_dwordx8

				; EG: VTX_READ_128
				; EG: VTX_READ_128
				; EG: VTX_READ_128
				define amdgpu_kernel void @constant_load_v11i32(<11 x i32> addrspace(1)* %out, <11 x i32> addrspace(4)* %in) #0 {
				entry:
				%ld = load <11 x i32>, <11 x i32> addrspace(4)* %in
				store <11 x i32> %ld, <11 x i32> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}constant_load_v12i32:
				; GCN: s_load_dwordx4
				; GCN: s_load_dwordx8

				; EG: VTX_READ_128
				; EG: VTX_READ_128
				; EG: VTX_READ_128
				define amdgpu_kernel void @constant_load_v12i32(<12 x i32> addrspace(1)* %out, <12 x i32> addrspace(4)* %in) #0 {
				entry:
				%ld = load <12 x i32>, <12 x i32> addrspace(4)* %in
				store <12 x i32> %ld, <12 x i32> addrspace(1)* %out
				ret void
				}

	; FUNC-LABEL: {{^}}constant_load_v16i32:			; FUNC-LABEL: {{^}}constant_load_v16i32:
	; GCN: s_load_dwordx16			; GCN: s_load_dwordx16

	; EG: VTX_READ_128			; EG: VTX_READ_128
	; EG: VTX_READ_128			; EG: VTX_READ_128
	; EG: VTX_READ_128			; EG: VTX_READ_128
	; EG: VTX_READ_128			; EG: VTX_READ_128
	define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {			define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
	▲ Show 20 Lines • Show All 362 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/load-global-f32.ll

	Show First 20 Lines • Show All 63 Lines • ▼ Show 20 Lines
	; R600: VTX_READ_128			; R600: VTX_READ_128
	define amdgpu_kernel void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {			define amdgpu_kernel void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
	entry:			entry:
	%tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in			%tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in
	store <8 x float> %tmp0, <8 x float> addrspace(1)* %out			store <8 x float> %tmp0, <8 x float> addrspace(1)* %out
	ret void			ret void
	}			}

				; FUNC-LABEL: {{^}}global_load_v9f32:
				; GCN-NOHSA: buffer_load_dword
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-HSA: flat_load_dwordx4
				; GCN-HSA: flat_load_dword
				; GCN-HSA: flat_load_dwordx4

				; R600: VTX_READ_128
				; R600: VTX_READ_32
				; R600: VTX_READ_128
				define amdgpu_kernel void @global_load_v9f32(<9 x float> addrspace(1)* %out, <9 x float> addrspace(1)* %in) #0 {
				entry:
				%tmp0 = load <9 x float>, <9 x float> addrspace(1)* %in
				store <9 x float> %tmp0, <9 x float> addrspace(1)* %out
				ret void
				}


				; FUNC-LABEL: {{^}}global_load_v10f32:
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx2
				; GCN-HSA: flat_load_dwordx4
				; GCN-HSA: flat_load_dwordx4
				; GCN-HSA: flat_load_dwordx2

				; R600: VTX_READ_128
				; R600: VTX_READ_128
				; R600: VTX_READ_128
				define amdgpu_kernel void @global_load_v10f32(<10 x float> addrspace(1)* %out, <10 x float> addrspace(1)* %in) #0 {
				arsenmUnsubmitted Not Done Reply Inline Actions New tests should use opaque pointers arsenm: New tests should use opaque pointers
				entry:
				%tmp0 = load <10 x float>, <10 x float> addrspace(1)* %in
				store <10 x float> %tmp0, <10 x float> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}global_load_v11f32:
				; SI-NOHSA: buffer_load_dwordx4
				; SI-NOHSA: buffer_load_dwordx4
				; SI-NOHSA: buffer_load_dwordx4
				; GCNX3-NOHSA: buffer_load_dwordx4
				; GCNX3-NOHSA: buffer_load_dwordx4
				; GCNX3-NOHSA: buffer_load_dwordx3
				; GCN-HSA: flat_load_dwordx4
				; GCN-HSA: flat_load_dwordx4
				; GCN-HSA: flat_load_dwordx3

				; R600: VTX_READ_128
				; R600: VTX_READ_128
				; R600: VTX_READ_128
				define amdgpu_kernel void @global_load_v11f32(<11 x float> addrspace(1)* %out, <11 x float> addrspace(1)* %in) #0 {
				entry:
				%tmp0 = load <11 x float>, <11 x float> addrspace(1)* %in
				store <11 x float> %tmp0, <11 x float> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}global_load_v12f32:
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-HSA: flat_load_dwordx4
				; GCN-HSA: flat_load_dwordx4
				; GCN-HSA: flat_load_dwordx4

				; R600: VTX_READ_128
				; R600: VTX_READ_128
				; R600: VTX_READ_128
				define amdgpu_kernel void @global_load_v12f32(<12 x float> addrspace(1)* %out, <12 x float> addrspace(1)* %in) #0 {
				entry:
				%tmp0 = load <12 x float>, <12 x float> addrspace(1)* %in
				store <12 x float> %tmp0, <12 x float> addrspace(1)* %out
				ret void
				}

	; FUNC-LABEL: {{^}}global_load_v16f32:			; FUNC-LABEL: {{^}}global_load_v16f32:
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4

	; GCN-HSA: flat_load_dwordx4			; GCN-HSA: flat_load_dwordx4
	; GCN-HSA: flat_load_dwordx4			; GCN-HSA: flat_load_dwordx4
	Show All 15 Lines

llvm/test/CodeGen/AMDGPU/load-global-i32.ll

	Show First 20 Lines • Show All 63 Lines • ▼ Show 20 Lines
	; EG: VTX_READ_128			; EG: VTX_READ_128
	define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {			define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
	entry:			entry:
	%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in			%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
	store <8 x i32> %ld, <8 x i32> addrspace(1)* %out			store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
	ret void			ret void
	}			}

				; FUNC-LABEL: {{^}}global_load_v9i32:
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dword
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dword
				define amdgpu_kernel void @global_load_v9i32(<9 x i32> addrspace(1)* %out, <9 x i32> addrspace(1)* %in) #0 {
				entry:
				%ld = load <9 x i32>, <9 x i32> addrspace(1)* %in
				store <9 x i32> %ld, <9 x i32> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}global_load_v10i32:
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx2
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx2
				define amdgpu_kernel void @global_load_v10i32(<10 x i32> addrspace(1)* %out, <10 x i32> addrspace(1)* %in) #0 {
				entry:
				%ld = load <10 x i32>, <10 x i32> addrspace(1)* %in
				store <10 x i32> %ld, <10 x i32> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}global_load_v11i32:
				; SI-NOHSA: buffer_load_dwordx4
				; SI-NOHSA: buffer_load_dwordx4
				; SI-NOHSA: buffer_load_dwordx4
				; GCNX3-NOHSA: buffer_load_dwordx4
				; GCNX3-NOHSA: buffer_load_dwordx4
				; GCNX3-NOHSA: buffer_load_dwordx3
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx3
				define amdgpu_kernel void @global_load_v11i32(<11 x i32> addrspace(1)* %out, <11 x i32> addrspace(1)* %in) #0 {
				entry:
				%ld = load <11 x i32>, <11 x i32> addrspace(1)* %in
				store <11 x i32> %ld, <11 x i32> addrspace(1)* %out
				ret void
				}


				; FUNC-LABEL: {{^}}global_load_v12i32:
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-NOHSA: buffer_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				; GCN-HSA: {{flat\|global}}_load_dwordx4
				define amdgpu_kernel void @global_load_v12i32(<12 x i32> addrspace(1)* %out, <12 x i32> addrspace(1)* %in) #0 {
				entry:
				%ld = load <12 x i32>, <12 x i32> addrspace(1)* %in
				store <12 x i32> %ld, <12 x i32> addrspace(1)* %out
				ret void
				}

	; FUNC-LABEL: {{^}}global_load_v16i32:			; FUNC-LABEL: {{^}}global_load_v16i32:
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4
	; GCN-NOHSA: buffer_load_dwordx4			; GCN-NOHSA: buffer_load_dwordx4

	; GCN-HSA: {{flat\|global}}_load_dwordx4			; GCN-HSA: {{flat\|global}}_load_dwordx4
	; GCN-HSA: {{flat\|global}}_load_dwordx4			; GCN-HSA: {{flat\|global}}_load_dwordx4
	▲ Show 20 Lines • Show All 511 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir

Show First 20 Lines • Show All 708 Lines • ▼ Show 20 Lines	bb.0.entry:
%5:vreg_256 = IMPLICIT_DEF		%5:vreg_256 = IMPLICIT_DEF
%6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)		%6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
%7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)		%7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...		...
---		---


# GFX11-LABEL: name: image_sample_c_d_cl_o_merged_v1v3		# GFX11-LABEL: name: image_sample_c_d_cl_o_merged_v1v3
# GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V16_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)		# GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0		# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3		# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3

name: image_sample_c_d_cl_o_merged_v1v3		name: image_sample_c_d_cl_o_merged_v1v3
body: \|		body: \|
bb.0.entry:		bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1		%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0		%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99		%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0		%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3		%4:vgpr_32 = COPY %2.sub3
%5:vreg_512 = IMPLICIT_DEF		%5:vreg_288 = IMPLICIT_DEF
%6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V16_gfx11 %5:vreg_512, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)		%6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_gfx11 %5:vreg_288, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
%7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V16_gfx11 %5:vreg_512, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)		%7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_gfx11 %5:vreg_288, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...		...
---		---


# GFX11-LABEL: name: image_sample_c_d_o_merged_v1v3		# GFX11-LABEL: name: image_sample_c_d_o_merged_v1v3
# GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)		# GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0		# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3		# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
▲ Show 20 Lines • Show All 274 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/select.f16.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs \| FileCheck %s --check-prefix=SI		; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs \| FileCheck %s --check-prefix=SI
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs \| FileCheck %s --check-prefix=VI		; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs \| FileCheck %s --check-prefix=VI

define amdgpu_kernel void @select_f16(		define amdgpu_kernel void @select_f16(
; SI-LABEL: select_f16:		; SI-LABEL: select_f16:
; SI: ; %bb.0: ; %entry		; SI: ; %bb.0: ; %entry
		; SI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9		; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9
		; SI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
		; SI-NEXT: s_mov_b32 s26, -1
		; SI-NEXT: s_mov_b32 s27, 0xe8f000
		; SI-NEXT: s_add_u32 s24, s24, s3
; SI-NEXT: s_mov_b32 s3, 0xf000		; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1		; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11		; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11
; SI-NEXT: s_mov_b32 s18, s2
; SI-NEXT: s_waitcnt lgkmcnt(0)		; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s16, s6		; SI-NEXT: s_mov_b32 s16, s6
; SI-NEXT: s_mov_b32 s17, s7		; SI-NEXT: s_mov_b32 s17, s7
		; SI-NEXT: s_mov_b32 s18, s2
; SI-NEXT: s_mov_b32 s19, s3		; SI-NEXT: s_mov_b32 s19, s3
; SI-NEXT: s_mov_b32 s20, s8		; SI-NEXT: s_mov_b32 s20, s8
; SI-NEXT: s_mov_b32 s21, s9		; SI-NEXT: s_mov_b32 s21, s9
; SI-NEXT: s_mov_b32 s22, s2		; SI-NEXT: s_mov_b32 s22, s2
; SI-NEXT: s_mov_b32 s23, s3		; SI-NEXT: s_mov_b32 s23, s3
; SI-NEXT: s_mov_b32 s8, s10		; SI-NEXT: s_mov_b32 s8, s10
; SI-NEXT: s_mov_b32 s9, s11		; SI-NEXT: s_mov_b32 s9, s11
; SI-NEXT: s_mov_b32 s10, s2		; SI-NEXT: s_mov_b32 s10, s2
; SI-NEXT: s_mov_b32 s11, s3		; SI-NEXT: s_mov_b32 s11, s3
; SI-NEXT: s_mov_b32 s14, s2		; SI-NEXT: s_mov_b32 s14, s2
; SI-NEXT: s_mov_b32 s15, s3		; SI-NEXT: s_mov_b32 s15, s3
; SI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc		; SI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)		; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_load_ushort v1, off, s[20:23], 0 glc		; SI-NEXT: buffer_load_ushort v1, off, s[20:23], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)		; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc		; SI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)		; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_load_ushort v3, off, s[12:15], 0 glc		; SI-NEXT: buffer_load_ushort v3, off, s[12:15], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)		; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b32 s0, s4		; SI-NEXT: s_mov_b32 s0, s4
; SI-NEXT: s_mov_b32 s1, s5		; SI-NEXT: s_mov_b32 s1, s5
		; SI-NEXT: s_addc_u32 s25, s25, 0
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0		; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1		; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2		; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-NEXT: v_cvt_f32_f16_e32 v3, v3		; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1		; SI-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; SI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc		; SI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0		; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0		; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm		; SI-NEXT: s_endpgm
;		;
; VI-LABEL: select_f16:		; VI-LABEL: select_f16:
; VI: ; %bb.0: ; %entry		; VI: ; %bb.0: ; %entry
		; VI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24		; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
		; VI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
		; VI-NEXT: s_mov_b32 s26, -1
		; VI-NEXT: s_mov_b32 s27, 0xe80000
		; VI-NEXT: s_add_u32 s24, s24, s3
; VI-NEXT: s_mov_b32 s3, 0xf000		; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1		; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44		; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44
; VI-NEXT: s_mov_b32 s18, s2
; VI-NEXT: s_waitcnt lgkmcnt(0)		; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s16, s6		; VI-NEXT: s_mov_b32 s16, s6
; VI-NEXT: s_mov_b32 s17, s7		; VI-NEXT: s_mov_b32 s17, s7
		; VI-NEXT: s_mov_b32 s18, s2
; VI-NEXT: s_mov_b32 s19, s3		; VI-NEXT: s_mov_b32 s19, s3
; VI-NEXT: s_mov_b32 s20, s8		; VI-NEXT: s_mov_b32 s20, s8
; VI-NEXT: s_mov_b32 s21, s9		; VI-NEXT: s_mov_b32 s21, s9
; VI-NEXT: s_mov_b32 s22, s2		; VI-NEXT: s_mov_b32 s22, s2
; VI-NEXT: s_mov_b32 s23, s3		; VI-NEXT: s_mov_b32 s23, s3
; VI-NEXT: s_mov_b32 s8, s10		; VI-NEXT: s_mov_b32 s8, s10
; VI-NEXT: s_mov_b32 s9, s11		; VI-NEXT: s_mov_b32 s9, s11
; VI-NEXT: s_mov_b32 s10, s2		; VI-NEXT: s_mov_b32 s10, s2
; VI-NEXT: s_mov_b32 s11, s3		; VI-NEXT: s_mov_b32 s11, s3
; VI-NEXT: s_mov_b32 s14, s2		; VI-NEXT: s_mov_b32 s14, s2
; VI-NEXT: s_mov_b32 s15, s3		; VI-NEXT: s_mov_b32 s15, s3
; VI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc		; VI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ushort v1, off, s[20:23], 0 glc		; VI-NEXT: buffer_load_ushort v1, off, s[20:23], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc		; VI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ushort v3, off, s[12:15], 0 glc		; VI-NEXT: buffer_load_ushort v3, off, s[12:15], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_mov_b32 s0, s4		; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5		; VI-NEXT: s_mov_b32 s1, s5
		; VI-NEXT: s_addc_u32 s25, s25, 0
; VI-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1		; VI-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc		; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0		; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: s_endpgm		; VI-NEXT: s_endpgm
half addrspace(1)* %r,		half addrspace(1)* %r,
half addrspace(1)* %a,		half addrspace(1)* %a,
half addrspace(1)* %b,		half addrspace(1)* %b,
half addrspace(1)* %c,		half addrspace(1)* %c,
▲ Show 20 Lines • Show All 329 Lines • ▼ Show 20 Lines	entry:
%r.val = select i1 %fcmp, half %c.val, half 0xH3800		%r.val = select i1 %fcmp, half %c.val, half 0xH3800
store half %r.val, half addrspace(1)* %r		store half %r.val, half addrspace(1)* %r
ret void		ret void
}		}

define amdgpu_kernel void @select_v2f16(		define amdgpu_kernel void @select_v2f16(
; SI-LABEL: select_v2f16:		; SI-LABEL: select_v2f16:
; SI: ; %bb.0: ; %entry		; SI: ; %bb.0: ; %entry
		; SI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9		; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9
		; SI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
		; SI-NEXT: s_mov_b32 s26, -1
; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11		; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11
		; SI-NEXT: s_mov_b32 s27, 0xe8f000
		; SI-NEXT: s_add_u32 s24, s24, s3
; SI-NEXT: s_mov_b32 s3, 0xf000		; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1		; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_mov_b32 s18, s2
; SI-NEXT: s_waitcnt lgkmcnt(0)		; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s16, s6		; SI-NEXT: s_mov_b32 s16, s6
; SI-NEXT: s_mov_b32 s17, s7		; SI-NEXT: s_mov_b32 s17, s7
		; SI-NEXT: s_mov_b32 s18, s2
; SI-NEXT: s_mov_b32 s19, s3		; SI-NEXT: s_mov_b32 s19, s3
; SI-NEXT: s_mov_b32 s20, s8		; SI-NEXT: s_mov_b32 s20, s8
; SI-NEXT: s_mov_b32 s21, s9		; SI-NEXT: s_mov_b32 s21, s9
; SI-NEXT: s_mov_b32 s22, s2		; SI-NEXT: s_mov_b32 s22, s2
; SI-NEXT: s_mov_b32 s23, s3		; SI-NEXT: s_mov_b32 s23, s3
; SI-NEXT: s_mov_b32 s14, s2		; SI-NEXT: s_mov_b32 s14, s2
; SI-NEXT: s_mov_b32 s15, s3		; SI-NEXT: s_mov_b32 s15, s3
; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0		; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0
; SI-NEXT: s_mov_b32 s8, s10		; SI-NEXT: s_mov_b32 s8, s10
; SI-NEXT: s_mov_b32 s9, s11		; SI-NEXT: s_mov_b32 s9, s11
; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0		; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
; SI-NEXT: s_mov_b32 s10, s2		; SI-NEXT: s_mov_b32 s10, s2
; SI-NEXT: s_mov_b32 s11, s3		; SI-NEXT: s_mov_b32 s11, s3
; SI-NEXT: buffer_load_dword v2, off, s[20:23], 0		; SI-NEXT: buffer_load_dword v2, off, s[20:23], 0
; SI-NEXT: buffer_load_dword v3, off, s[8:11], 0		; SI-NEXT: buffer_load_dword v3, off, s[8:11], 0
; SI-NEXT: s_mov_b32 s0, s4		; SI-NEXT: s_mov_b32 s0, s4
; SI-NEXT: s_mov_b32 s1, s5		; SI-NEXT: s_mov_b32 s1, s5
		; SI-NEXT: s_addc_u32 s25, s25, 0
; SI-NEXT: s_waitcnt vmcnt(3)		; SI-NEXT: s_waitcnt vmcnt(3)
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1		; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5		; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1		; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: s_waitcnt vmcnt(2)		; SI-NEXT: s_waitcnt vmcnt(2)
; SI-NEXT: v_cvt_f32_f16_e32 v4, v0		; SI-NEXT: v_cvt_f32_f16_e32 v4, v0
; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0		; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-NEXT: s_waitcnt vmcnt(1)		; SI-NEXT: s_waitcnt vmcnt(1)
Show All 13 Lines
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1		; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0		; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; SI-NEXT: v_or_b32_e32 v0, v1, v0		; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0		; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm		; SI-NEXT: s_endpgm
;		;
; VI-LABEL: select_v2f16:		; VI-LABEL: select_v2f16:
; VI: ; %bb.0: ; %entry		; VI: ; %bb.0: ; %entry
		; VI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
		; VI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24		; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44		; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44
		; VI-NEXT: s_mov_b32 s26, -1
		; VI-NEXT: s_mov_b32 s27, 0xe80000
		; VI-NEXT: s_add_u32 s24, s24, s3
; VI-NEXT: s_mov_b32 s3, 0xf000		; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1		; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s14, s2		; VI-NEXT: s_mov_b32 s14, s2
; VI-NEXT: s_mov_b32 s15, s3		; VI-NEXT: s_mov_b32 s15, s3
; VI-NEXT: s_waitcnt lgkmcnt(0)		; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s16, s6		; VI-NEXT: s_mov_b32 s16, s6
; VI-NEXT: s_mov_b32 s17, s7		; VI-NEXT: s_mov_b32 s17, s7
; VI-NEXT: s_mov_b32 s18, s2		; VI-NEXT: s_mov_b32 s18, s2
; VI-NEXT: s_mov_b32 s19, s3		; VI-NEXT: s_mov_b32 s19, s3
; VI-NEXT: s_mov_b32 s20, s8		; VI-NEXT: s_mov_b32 s20, s8
; VI-NEXT: s_mov_b32 s21, s9		; VI-NEXT: s_mov_b32 s21, s9
; VI-NEXT: s_mov_b32 s22, s2		; VI-NEXT: s_mov_b32 s22, s2
; VI-NEXT: s_mov_b32 s23, s3		; VI-NEXT: s_mov_b32 s23, s3
; VI-NEXT: s_mov_b32 s8, s10		; VI-NEXT: s_mov_b32 s8, s10
; VI-NEXT: s_mov_b32 s9, s11		; VI-NEXT: s_mov_b32 s9, s11
; VI-NEXT: s_mov_b32 s10, s2		; VI-NEXT: s_mov_b32 s10, s2
; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0		; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0
; VI-NEXT: buffer_load_dword v1, off, s[20:23], 0		; VI-NEXT: buffer_load_dword v1, off, s[20:23], 0
; VI-NEXT: buffer_load_dword v2, off, s[16:19], 0		; VI-NEXT: buffer_load_dword v2, off, s[16:19], 0
; VI-NEXT: s_mov_b32 s11, s3		; VI-NEXT: s_mov_b32 s11, s3
; VI-NEXT: buffer_load_dword v3, off, s[8:11], 0		; VI-NEXT: buffer_load_dword v3, off, s[8:11], 0
; VI-NEXT: s_mov_b32 s0, s4		; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5		; VI-NEXT: s_mov_b32 s1, s5
		; VI-NEXT: s_addc_u32 s25, s25, 0
; VI-NEXT: s_waitcnt vmcnt(3)		; VI-NEXT: s_waitcnt vmcnt(3)
; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v0		; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; VI-NEXT: s_waitcnt vmcnt(2)		; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v1		; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; VI-NEXT: s_waitcnt vmcnt(1)		; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2		; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; VI-NEXT: v_cmp_lt_f16_e32 vcc, v2, v1		; VI-NEXT: v_cmp_lt_f16_e32 vcc, v2, v1
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
▲ Show 20 Lines • Show All 423 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/v_madak_f16.ll

Show First 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	entry:

store half %r.val, half addrspace(1)* %r		store half %r.val, half addrspace(1)* %r
ret void		ret void
}		}

define amdgpu_kernel void @madak_f16_use_2(		define amdgpu_kernel void @madak_f16_use_2(
; SI-LABEL: madak_f16_use_2:		; SI-LABEL: madak_f16_use_2:
; SI: ; %bb.0: ; %entry		; SI: ; %bb.0: ; %entry
		; SI-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9		; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9
		; SI-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
		; SI-NEXT: s_mov_b32 s22, -1
		; SI-NEXT: s_mov_b32 s23, 0xe8f000
; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11		; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11
		; SI-NEXT: s_add_u32 s20, s20, s3
; SI-NEXT: s_mov_b32 s3, 0xf000		; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1		; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_mov_b32 s18, s2
; SI-NEXT: s_waitcnt lgkmcnt(0)		; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s16, s8		; SI-NEXT: s_mov_b32 s16, s8
; SI-NEXT: s_mov_b32 s17, s9		; SI-NEXT: s_mov_b32 s17, s9
		; SI-NEXT: s_mov_b32 s18, s2
; SI-NEXT: s_mov_b32 s19, s3		; SI-NEXT: s_mov_b32 s19, s3
; SI-NEXT: s_mov_b32 s8, s10		; SI-NEXT: s_mov_b32 s8, s10
; SI-NEXT: s_mov_b32 s9, s11		; SI-NEXT: s_mov_b32 s9, s11
; SI-NEXT: s_mov_b32 s10, s2		; SI-NEXT: s_mov_b32 s10, s2
; SI-NEXT: s_mov_b32 s11, s3		; SI-NEXT: s_mov_b32 s11, s3
; SI-NEXT: s_mov_b32 s14, s2		; SI-NEXT: s_mov_b32 s14, s2
; SI-NEXT: s_mov_b32 s15, s3		; SI-NEXT: s_mov_b32 s15, s3
; SI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc		; SI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)		; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc		; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)		; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 glc		; SI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)		; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_mov_b32_e32 v3, 0x41200000		; SI-NEXT: v_mov_b32_e32 v3, 0x41200000
; SI-NEXT: s_mov_b32 s0, s4		; SI-NEXT: s_mov_b32 s0, s4
; SI-NEXT: s_mov_b32 s1, s5		; SI-NEXT: s_mov_b32 s1, s5
		; SI-NEXT: s_addc_u32 s21, s21, 0
; SI-NEXT: s_mov_b32 s8, s6		; SI-NEXT: s_mov_b32 s8, s6
; SI-NEXT: s_mov_b32 s9, s7		; SI-NEXT: s_mov_b32 s9, s7
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0		; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1		; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2		; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-NEXT: v_madak_f32 v1, v0, v1, 0x41200000		; SI-NEXT: v_madak_f32 v1, v0, v1, 0x41200000
; SI-NEXT: v_mac_f32_e32 v3, v0, v2		; SI-NEXT: v_mac_f32_e32 v3, v0, v2
; SI-NEXT: v_cvt_f16_f32_e32 v0, v1		; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
; SI-NEXT: v_cvt_f16_f32_e32 v1, v3		; SI-NEXT: v_cvt_f16_f32_e32 v1, v3
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0		; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: buffer_store_short v1, off, s[8:11], 0		; SI-NEXT: buffer_store_short v1, off, s[8:11], 0
; SI-NEXT: s_endpgm		; SI-NEXT: s_endpgm
;		;
; VI-LABEL: madak_f16_use_2:		; VI-LABEL: madak_f16_use_2:
; VI: ; %bb.0: ; %entry		; VI: ; %bb.0: ; %entry
		; VI-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24		; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
		; VI-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
		; VI-NEXT: s_mov_b32 s22, -1
		; VI-NEXT: s_mov_b32 s23, 0xe80000
; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44		; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44
		; VI-NEXT: s_add_u32 s20, s20, s3
; VI-NEXT: s_mov_b32 s3, 0xf000		; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1		; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s18, s2
; VI-NEXT: s_waitcnt lgkmcnt(0)		; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s16, s8		; VI-NEXT: s_mov_b32 s16, s8
; VI-NEXT: s_mov_b32 s17, s9		; VI-NEXT: s_mov_b32 s17, s9
		; VI-NEXT: s_mov_b32 s18, s2
; VI-NEXT: s_mov_b32 s19, s3		; VI-NEXT: s_mov_b32 s19, s3
; VI-NEXT: s_mov_b32 s8, s10		; VI-NEXT: s_mov_b32 s8, s10
; VI-NEXT: s_mov_b32 s9, s11		; VI-NEXT: s_mov_b32 s9, s11
; VI-NEXT: s_mov_b32 s10, s2		; VI-NEXT: s_mov_b32 s10, s2
; VI-NEXT: s_mov_b32 s11, s3		; VI-NEXT: s_mov_b32 s11, s3
; VI-NEXT: s_mov_b32 s14, s2		; VI-NEXT: s_mov_b32 s14, s2
; VI-NEXT: s_mov_b32 s15, s3		; VI-NEXT: s_mov_b32 s15, s3
; VI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc		; VI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc		; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 glc		; VI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)		; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_mov_b32_e32 v3, 0x4900		; VI-NEXT: v_mov_b32_e32 v3, 0x4900
; VI-NEXT: s_mov_b32 s0, s4		; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5		; VI-NEXT: s_mov_b32 s1, s5
		; VI-NEXT: s_addc_u32 s21, s21, 0
; VI-NEXT: s_mov_b32 s8, s6		; VI-NEXT: s_mov_b32 s8, s6
; VI-NEXT: s_mov_b32 s9, s7		; VI-NEXT: s_mov_b32 s9, s7
; VI-NEXT: v_madak_f16 v1, v0, v1, 0x4900		; VI-NEXT: v_madak_f16 v1, v0, v1, 0x4900
; VI-NEXT: v_mac_f16_e32 v3, v0, v2		; VI-NEXT: v_mac_f16_e32 v3, v0, v2
; VI-NEXT: buffer_store_short v1, off, s[0:3], 0		; VI-NEXT: buffer_store_short v1, off, s[0:3], 0
; VI-NEXT: buffer_store_short v3, off, s[8:11], 0		; VI-NEXT: buffer_store_short v3, off, s[8:11], 0
; VI-NEXT: s_endpgm		; VI-NEXT: s_endpgm
half addrspace(1)* %r0,		half addrspace(1)* %r0,
Show All 20 Lines

llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir

	# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py			# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
	# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=si-insert-waitcnts -verify-machineinstrs %s -o - \| FileCheck -check-prefix=GCN %s			# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=si-insert-waitcnts -verify-machineinstrs %s -o - \| FileCheck -check-prefix=GCN %s

	---			---
	name: waitcnt-check-inorder			name: waitcnt-check-inorder
	body: \|			body: \|
	bb.0:			bb.0:
	; GCN-LABEL: name: waitcnt-check-inorder			; GCN-LABEL: name: waitcnt-check-inorder
	; GCN: S_WAITCNT 0			; GCN: S_WAITCNT 0
	; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0			; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	; GCN-NEXT: S_ENDPGM 0			; GCN-NEXT: S_ENDPGM 0
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	S_ENDPGM 0			S_ENDPGM 0
	...			...
	---			---
	name: waitcnt-check-vs-vmem			name: waitcnt-check-vs-vmem
	body: \|			body: \|
	bb.0:			bb.0:
	; GCN-LABEL: name: waitcnt-check-vs-vmem			; GCN-LABEL: name: waitcnt-check-vs-vmem
	; GCN: S_WAITCNT 0			; GCN: S_WAITCNT 0
	; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0			; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	; GCN-NEXT: S_WAITCNT 16240			; GCN-NEXT: S_WAITCNT 16240
	; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec			; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
	; GCN-NEXT: S_ENDPGM 0			; GCN-NEXT: S_ENDPGM 0
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec			$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
	S_ENDPGM 0			S_ENDPGM 0
	...			...
	---			---
	name: waitcnt-check-vs-mimg-samp			name: waitcnt-check-vs-mimg-samp
	body: \|			body: \|
	bb.0:			bb.0:
	; GCN-LABEL: name: waitcnt-check-vs-mimg-samp			; GCN-LABEL: name: waitcnt-check-vs-mimg-samp
	; GCN: S_WAITCNT 0			; GCN: S_WAITCNT 0
	; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0			; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	; GCN-NEXT: S_WAITCNT 16240			; GCN-NEXT: S_WAITCNT 16240
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
	; GCN-NEXT: S_ENDPGM 0			; GCN-NEXT: S_ENDPGM 0
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
	S_ENDPGM 0			S_ENDPGM 0
	...			...
	---			---
	name: waitcnt-check-vs-vmem-reverse			name: waitcnt-check-vs-vmem-reverse
	body: \|			body: \|
	bb.0:			bb.0:
	; GCN-LABEL: name: waitcnt-check-vs-vmem-reverse			; GCN-LABEL: name: waitcnt-check-vs-vmem-reverse
	; GCN: S_WAITCNT 0			; GCN: S_WAITCNT 0
	; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0			; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
	; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec			; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
	; GCN-NEXT: S_WAITCNT 16240			; GCN-NEXT: S_WAITCNT 16240
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	; GCN-NEXT: S_ENDPGM 0			; GCN-NEXT: S_ENDPGM 0
	$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec			$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	S_ENDPGM 0			S_ENDPGM 0
	...			...
	---			---
	name: waitcnt-check-vs-mimg-samp-reverse			name: waitcnt-check-vs-mimg-samp-reverse
	body: \|			body: \|
	bb.0:			bb.0:
	; GCN-LABEL: name: waitcnt-check-vs-mimg-samp-reverse			; GCN-LABEL: name: waitcnt-check-vs-mimg-samp-reverse
	; GCN: S_WAITCNT 0			; GCN: S_WAITCNT 0
	; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0			; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
	; GCN-NEXT: S_WAITCNT 16240			; GCN-NEXT: S_WAITCNT 16240
	; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	; GCN-NEXT: S_ENDPGM 0			; GCN-NEXT: S_ENDPGM 0
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
	$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")			$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
	S_ENDPGM 0			S_ENDPGM 0
	...			...

llvm/test/MC/AMDGPU/gfx1013.s

	// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1013 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s \| FileCheck %s			// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1013 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s \| FileCheck %s

	image_bvh64_intersect_ray v[5:8], v[1:16], s[8:11]			image_bvh64_intersect_ray v[5:8], v[1:12], s[8:11]
	// CHECK: [0x01,0x9f,0x9c,0xf1,0x01,0x05,0x02,0x00]			// CHECK: [0x01,0x9f,0x9c,0xf1,0x01,0x05,0x02,0x00]

	image_bvh64_intersect_ray v[5:8], v[240:255], s[8:11] a16			image_bvh64_intersect_ray v[5:8], v[247:255], s[8:11] a16
	// CHECK: [0x01,0x9f,0x9c,0xf1,0xf0,0x05,0x02,0x40]			// CHECK: [0x01,0x9f,0x9c,0xf1,0xf7,0x05,0x02,0x40]

	image_bvh64_intersect_ray v[5:8], v[1:16], ttmp[12:15]			image_bvh64_intersect_ray v[5:8], v[1:12], ttmp[12:15]
	// CHECK: [0x01,0x9f,0x9c,0xf1,0x01,0x05,0x1e,0x00]			// CHECK: [0x01,0x9f,0x9c,0xf1,0x01,0x05,0x1e,0x00]

	image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15]			image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15]
	// CHECK: encoding: [0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00]			// CHECK: [0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00]

	image_bvh_intersect_ray v[252:255], v[1:16], s[8:11]			image_bvh_intersect_ray v[252:255], v[1:11], s[8:11]
	// CHECK: [0x01,0x9f,0x98,0xf1,0x01,0xfc,0x02,0x00]			// CHECK: [0x01,0x9f,0x98,0xf1,0x01,0xfc,0x02,0x00]

	image_bvh_intersect_ray v[5:8], v[248:255], s[8:11] a16			image_bvh_intersect_ray v[5:8], v[248:255], s[8:11] a16
	// CHECK: [0x01,0x9f,0x98,0xf1,0xf8,0x05,0x02,0x40]			// CHECK: [0x01,0x9f,0x98,0xf1,0xf8,0x05,0x02,0x40]

	image_bvh_intersect_ray v[5:8], v[1:16], ttmp[12:15]			image_bvh_intersect_ray v[5:8], v[1:11], ttmp[12:15]
	// CHECK: [0x01,0x9f,0x98,0xf1,0x01,0x05,0x1e,0x00]			// CHECK: [0x01,0x9f,0x98,0xf1,0x01,0x05,0x1e,0x00]

	image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]			image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
	// CHECK: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]			// CHECK: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]

	image_msaa_load v[5:6], v[1:4], s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY			image_msaa_load v[5:6], v[1:4], s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
	// CHECK: [0x39,0x03,0x00,0xf0,0x01,0x05,0x02,0x00]			// CHECK: [0x39,0x03,0x00,0xf0,0x01,0x05,0x02,0x00]

	image_msaa_load v[5:7], v[1:4], s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY tfe			image_msaa_load v[5:7], v[1:4], s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY tfe
	// CHECK: [0x39,0x03,0x01,0xf0,0x01,0x05,0x02,0x00]			// CHECK: [0x39,0x03,0x01,0xf0,0x01,0x05,0x02,0x00]

	image_msaa_load v5, v[1:4], s[8:15] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16			image_msaa_load v5, v[1:4], s[8:15] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16
	Show All 22 Lines

llvm/test/MC/AMDGPU/gfx1030_new.s

	Show First 20 Lines • Show All 78 Lines • ▼ Show 20 Lines
	// GFX10: encoding: [0x01,0x05,0x00,0x0c]			// GFX10: encoding: [0x01,0x05,0x00,0x0c]

	v_fmac_legacy_f32 v0, \|v1\|, -v2			v_fmac_legacy_f32 v0, \|v1\|, -v2
	// GFX10: encoding: [0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40]			// GFX10: encoding: [0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40]

	v_fmac_legacy_f32 v0, s1, 2.0			v_fmac_legacy_f32 v0, s1, 2.0
	// GFX10: encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00]			// GFX10: encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00]

	image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]			image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
	// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]			// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]

	image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16			image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
	// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40]			// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40]

	image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]			image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
	// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00]			// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00]

	image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16			image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
	// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40]			// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40]

	image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]			image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
	// GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]			// GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]

	image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16			image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16
	// GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00]			// GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00]

	▲ Show 20 Lines • Show All 117 Lines • Show Last 20 Lines

llvm/test/MC/AMDGPU/gfx10_asm_mimg.s

	Show First 20 Lines • Show All 292 Lines • ▼ Show 20 Lines
	; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x00,0x00]			; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x00,0x00]

	image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D			image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
	; GFX10: image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]			; GFX10: image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]

	image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D			image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
	; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15]			; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15]

	image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D			image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
	; GFX10: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]			; GFX10: image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]

	image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE			image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE
	; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x05,0x00,0x00]			; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x05,0x00,0x00]

	image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
	; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00]			; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00]

	image_sample_d v[64:66], [v32, v16, v8, v4, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY			image_sample_d v[64:66], [v32, v16, v8, v4, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY
	▲ Show 20 Lines • Show All 346 Lines • Show Last 20 Lines

llvm/test/MC/AMDGPU/gfx10_unsupported.s

	Show First 20 Lines • Show All 755 Lines • ▼ Show 20 Lines
	// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU			// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

	global_store_d16_hi_b8 v1, v2, s[104:105]			global_store_d16_hi_b8 v1, v2, s[104:105]
	// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU			// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

	global_store_dword_addtid v1, off offset:16 glc slc dlc			global_store_dword_addtid v1, off offset:16 glc slc dlc
	// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU			// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

	image_bvh64_intersect_ray v[252:255], v[240:255], ttmp[12:15] a16			image_bvh64_intersect_ray v[252:255], v[247:255], ttmp[12:15] a16
	// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU			// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

	image_bvh_intersect_ray v[252:255], v[1:16], s[8:11]			image_bvh_intersect_ray v[252:255], v[1:11], s[8:11]
	// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU			// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

	image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY			image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
	// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU			// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

	lds_direct_load v1 wait_vdst:15			lds_direct_load v1 wait_vdst:15
	// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU			// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

	▲ Show 20 Lines • Show All 2,537 Lines • Show Last 20 Lines

llvm/test/MC/AMDGPU/gfx11_asm_mimg.s

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 1,242 Lines • ▼ Show 20 Lines
	// GFX11: [0x98,0x03,0x50,0xf0,0xfd,0x01,0x03,0x00]			// GFX11: [0x98,0x03,0x50,0xf0,0xfd,0x01,0x03,0x00]

	image_atomic_xor v[1:2], v[2:3], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm a16			image_atomic_xor v[1:2], v[2:3], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
	// GFX11: [0x98,0x03,0x51,0xf0,0x02,0x01,0x18,0x00]			// GFX11: [0x98,0x03,0x51,0xf0,0x02,0x01,0x18,0x00]

	image_atomic_xor v[254:255], v[254:255], ttmp[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe			image_atomic_xor v[254:255], v[254:255], ttmp[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe
	// GFX11: [0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00]			// GFX11: [0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00]

	image_bvh64_intersect_ray v[5:8], v[1:16], s[8:11]			image_bvh64_intersect_ray v[5:8], v[1:12], s[8:11]
	// GFX11: [0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00]			// GFX11: [0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00]

	image_bvh64_intersect_ray v[5:8], v[240:255], s[8:11]			image_bvh64_intersect_ray v[5:8], v[244:255], s[8:11]
	// GFX11: [0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00]			// GFX11: [0x80,0x8f,0x68,0xf0,0xf4,0x05,0x02,0x00]

	image_bvh64_intersect_ray v[5:8], v[1:16], s[100:103] a16			image_bvh64_intersect_ray v[5:8], v[1:9], s[100:103] a16
	// GFX11: [0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00]			// GFX11: [0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00]

	image_bvh64_intersect_ray v[252:255], v[240:255], ttmp[12:15] a16			image_bvh64_intersect_ray v[252:255], v[247:255], ttmp[12:15] a16
	// GFX11: [0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00]			// GFX11: [0x80,0x8f,0x69,0xf0,0xf7,0xfc,0x1e,0x00]

	image_bvh_intersect_ray v[5:8], v[1:16], s[8:11]			image_bvh_intersect_ray v[5:8], v[1:11], s[8:11]
	// GFX11: [0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00]			// GFX11: [0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00]

	image_bvh_intersect_ray v[5:8], v[240:255], s[8:11]			image_bvh_intersect_ray v[5:8], v[245:255], s[8:11]
	// GFX11: [0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00]			// GFX11: [0x80,0x8f,0x64,0xf0,0xf5,0x05,0x02,0x00]

	image_bvh_intersect_ray v[5:8], v[1:8], s[100:103] a16			image_bvh_intersect_ray v[5:8], v[1:8], s[100:103] a16
	// GFX11: [0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00]			// GFX11: [0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00]

	image_bvh_intersect_ray v[252:255], v[248:255], ttmp[12:15] a16			image_bvh_intersect_ray v[252:255], v[248:255], ttmp[12:15] a16
	// GFX11: [0x80,0x8f,0x65,0xf0,0xf8,0xfc,0x1e,0x00]			// GFX11: [0x80,0x8f,0x65,0xf0,0xf8,0xfc,0x1e,0x00]

	image_gather4 v[5:8], v[1:2], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D			image_gather4 v[5:8], v[1:2], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D
	▲ Show 20 Lines • Show All 1,983 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0x84,0xf0,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x84,0xf0,0xfc,0x05,0x02,0x0c]

	image_sample_c_d v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c]

	image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c]

	image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x85,0xf0,0xf1,0x05,0x02,0x0c]

	image_sample_c_d v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x84,0xf0,0xf9,0xfe,0x02,0x0c]			// GFX11: [0x04,0x03,0x84,0xf0,0xf9,0xfe,0x02,0x0c]

	image_sample_c_d v[5:7], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_c_d v[5:7], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0x10,0xf1,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x10,0xf1,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl v[5:6], v[241:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x10,0xf1,0xf1,0x05,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x11,0xf1,0xf1,0x05,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_cl v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x10,0xf1,0xf8,0xfe,0x02,0x0c]			// GFX11: [0x04,0x03,0x10,0xf1,0xf8,0xfe,0x02,0x0c]

	image_sample_c_d_cl v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_c_d_cl v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0x11,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x04,0x03,0x11,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_c_d_cl v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_c_d_cl v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c]			// GFX11: [0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c]

	image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_c_d_cl v255, v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c]			// GFX11: [0x0c,0x03,0x12,0xf1,0xf1,0xff,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_c_d_cl v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_c_d_cl v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_c_d_cl v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0x13,0xf1,0xf9,0xfe,0x22,0x0c]			// GFX11: [0x0c,0x03,0x13,0xf1,0xf9,0xfe,0x22,0x0c]

	image_sample_c_d_cl v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_c_d_cl v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0x10,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x10,0x04,0x10,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_c_d_cl v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0x10,0xf1,0xfa,0xff,0x02,0x0c]			// GFX11: [0x10,0x04,0x10,0xf1,0xfa,0xff,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_c_d_cl v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0x11,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x10,0x04,0x11,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_c_d_cl v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_c_d_cl v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c]			// GFX11: [0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c]

	image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_c_d_cl v255, v[241:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c]			// GFX11: [0x14,0x04,0x12,0xf1,0xf1,0xff,0x02,0x0c]

	image_sample_c_d_cl v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16			image_sample_c_d_cl v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
	// GFX11: [0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64]			// GFX11: [0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64]

	image_sample_c_d_cl v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16			image_sample_c_d_cl v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16
	// GFX11: [0x94,0x74,0x13,0xf1,0xf9,0xfe,0x7d,0x78]			// GFX11: [0x94,0x74,0x13,0xf1,0xf9,0xfe,0x7d,0x78]

	image_sample_c_d_cl_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_c_d_cl_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_c_d_cl_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0x50,0xf1,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x50,0xf1,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl_g16 v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x50,0xf1,0xf1,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x51,0xf1,0xf9,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x51,0xf1,0xf9,0x05,0x02,0x0c]

	image_sample_c_d_cl_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_cl_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0x28,0xf1,0xfa,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x28,0xf1,0xfa,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl_o v[5:6], v[1:12], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl_o v[5:6], v[240:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v[254:255], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_cl_o v[254:255], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c]			// GFX11: [0x04,0x03,0x28,0xf1,0xf1,0xfe,0x02,0x0c]

	image_sample_c_d_cl_o v[5:7], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_c_d_cl_o v[5:7], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_c_d_cl_o v[253:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_c_d_cl_o v[253:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c]			// GFX11: [0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c]

	image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c]			// GFX11: [0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_c_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0x2b,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x0c,0x03,0x2b,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_c_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_c_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0x2b,0xf1,0xf8,0xfe,0x22,0x0c]			// GFX11: [0x0c,0x03,0x2b,0xf1,0xf8,0xfe,0x22,0x0c]

	image_sample_c_d_cl_o v5, v[1:7], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_c_d_cl_o v5, v[1:7], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0x28,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x10,0x04,0x28,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v255, v[249:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_c_d_cl_o v255, v[249:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0x28,0xf1,0xf9,0xff,0x02,0x0c]			// GFX11: [0x10,0x04,0x28,0xf1,0xf9,0xff,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_c_d_cl_o v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0x29,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x10,0x04,0x29,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_c_d_cl_o v[254:255], v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_c_d_cl_o v[254:255], v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c]			// GFX11: [0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c]

	image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c]			// GFX11: [0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c]

	image_sample_c_d_cl_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16			image_sample_c_d_cl_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
	// GFX11: [0x14,0x04,0x2b,0xf1,0x01,0x05,0x38,0x64]			// GFX11: [0x14,0x04,0x2b,0xf1,0x01,0x05,0x38,0x64]

	image_sample_c_d_cl_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16			image_sample_c_d_cl_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16
	// GFX11: [0x94,0x74,0x2b,0xf1,0xf8,0xfe,0x7d,0x78]			// GFX11: [0x94,0x74,0x2b,0xf1,0xf8,0xfe,0x7d,0x78]

	image_sample_c_d_cl_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_c_d_cl_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o_g16 v[5:6], v[250:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_c_d_cl_o_g16 v[5:6], v[250:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0x58,0xf1,0xfa,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x58,0xf1,0xfa,0x05,0x02,0x0c]

	image_sample_c_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl_o_g16 v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_cl_o_g16 v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c]

	image_sample_c_d_cl_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_c_d_cl_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_cl_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x59,0xf1,0xf8,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x59,0xf1,0xf8,0x05,0x02,0x0c]

	▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0xac,0xf0,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0xac,0xf0,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_o v[5:6], v[241:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xac,0xf0,0xf1,0x05,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x04,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0xac,0xf0,0xf8,0xfe,0x02,0x0c]			// GFX11: [0x04,0x03,0xac,0xf0,0xf8,0xfe,0x02,0x0c]

	image_sample_c_d_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_c_d_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0xad,0xf0,0x01,0x05,0x22,0x0c]			// GFX11: [0x04,0x03,0xad,0xf0,0x01,0x05,0x22,0x0c]

	image_sample_c_d_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_c_d_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c]			// GFX11: [0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c]

	image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_c_d_o v255, v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c]			// GFX11: [0x0c,0x03,0xae,0xf0,0xf1,0xff,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c]			// GFX11: [0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c]

	image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0xaf,0xf0,0xf8,0xfe,0x22,0x0c]			// GFX11: [0x0c,0x03,0xaf,0xf0,0xf8,0xfe,0x22,0x0c]

	image_sample_c_d_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_c_d_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0xac,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x10,0x04,0xac,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_c_d_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0xac,0xf0,0xfa,0xff,0x02,0x0c]			// GFX11: [0x10,0x04,0xac,0xf0,0xfa,0xff,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0xad,0xf0,0x01,0x05,0x22,0x0c]			// GFX11: [0x10,0x04,0xad,0xf0,0x01,0x05,0x22,0x0c]

	image_sample_c_d_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_c_d_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c]			// GFX11: [0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c]

	image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_c_d_o v255, v[241:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c]			// GFX11: [0x14,0x04,0xae,0xf0,0xf1,0xff,0x02,0x0c]

	image_sample_c_d_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16			image_sample_c_d_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
	// GFX11: [0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64]			// GFX11: [0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64]

	image_sample_c_d_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16			image_sample_c_d_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16
	// GFX11: [0x94,0x74,0xaf,0xf0,0xf8,0xfe,0x7d,0x78]			// GFX11: [0x94,0x74,0xaf,0xf0,0xf8,0xfe,0x7d,0x78]

	image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0xf0,0xf0,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0xf0,0xf0,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_c_d_o_g16 v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xf0,0xf0,0xf1,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_c_d_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0xf1,0xf0,0xf8,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0xf1,0xf0,0xf8,0x05,0x02,0x0c]

	image_sample_c_d_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_c_d_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	▲ Show 20 Lines • Show All 555 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0x70,0xf0,0xfd,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x70,0xf0,0xfd,0x05,0x02,0x0c]

	image_sample_d v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_d v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c]

	image_sample_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x70,0xf0,0xf1,0x05,0x02,0x0c]

	image_sample_d v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_d v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x71,0xf0,0xf8,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x71,0xf0,0xf8,0x05,0x02,0x0c]

	image_sample_d v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_d v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0x04,0xf1,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x04,0xf1,0xfc,0x05,0x02,0x0c]

	image_sample_d_cl v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_cl v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_cl v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c]

	image_sample_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_cl v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_cl v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c]

	image_sample_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_cl v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x05,0xf1,0xf8,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x05,0xf1,0xf8,0x05,0x02,0x0c]

	▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0x1c,0xf1,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x1c,0xf1,0xfb,0x05,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_cl_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_cl_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_cl_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_cl_o v[5:6], v[241:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x1c,0xf1,0xf1,0x05,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_cl_o v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x1d,0xf1,0xf1,0x05,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x1c,0xf1,0xf8,0xfe,0x02,0x0c]			// GFX11: [0x04,0x03,0x1c,0xf1,0xf8,0xfe,0x02,0x0c]

	image_sample_d_cl_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_d_cl_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0x1d,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x04,0x03,0x1d,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_d_cl_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_d_cl_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	// GFX11: [0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c]			// GFX11: [0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c]

	image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16			image_sample_d_cl_o v255, v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
	// GFX11: [0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c]			// GFX11: [0x0c,0x03,0x1e,0xf1,0xf1,0xff,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_d_cl_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_d_cl_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16			image_sample_d_cl_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
	// GFX11: [0x0c,0x03,0x1f,0xf1,0xf9,0xfe,0x22,0x0c]			// GFX11: [0x0c,0x03,0x1f,0xf1,0xf9,0xfe,0x22,0x0c]

	image_sample_d_cl_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_d_cl_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0x1c,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x10,0x04,0x1c,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_d_cl_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: [0x10,0x04,0x1c,0xf1,0xfa,0xff,0x02,0x0c]			// GFX11: [0x10,0x04,0x1c,0xf1,0xfa,0xff,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0x1d,0xf1,0x01,0x05,0x22,0x0c]			// GFX11: [0x10,0x04,0x1d,0xf1,0x01,0x05,0x22,0x0c]

	image_sample_d_cl_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe			image_sample_d_cl_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
	// GFX11: [0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c]			// GFX11: [0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c]

	image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16			image_sample_d_cl_o v255, v[241:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
	// GFX11: [0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c]			// GFX11: [0x14,0x04,0x1e,0xf1,0xf1,0xff,0x02,0x0c]

	image_sample_d_cl_o v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16			image_sample_d_cl_o v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
	// GFX11: [0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64]			// GFX11: [0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64]

	image_sample_d_cl_o v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16			image_sample_d_cl_o v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16
	// GFX11: [0x94,0x74,0x1f,0xf1,0xf9,0xfe,0x7d,0x78]			// GFX11: [0x94,0x74,0x1f,0xf1,0xf9,0xfe,0x7d,0x78]

	image_sample_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D			image_sample_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
	// GFX11: [0x00,0x03,0x54,0xf1,0xfb,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x54,0xf1,0xfb,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_cl_o_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_cl_o_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_cl_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_cl_o_g16 v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x54,0xf1,0xf1,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_cl_o_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_cl_o_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x55,0xf1,0xf9,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x55,0xf1,0xf9,0x05,0x02,0x0c]

	image_sample_d_cl_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_d_cl_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines
	// GFX11: [0x00,0x03,0x98,0xf0,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x98,0xf0,0xfc,0x05,0x02,0x0c]

	image_sample_d_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_d_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16			image_sample_d_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
	// GFX11: [0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c]			// GFX11: [0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c]

	image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D			image_sample_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
	// GFX11: [0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c]

	image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16			image_sample_d_o v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
	// GFX11: [0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c]			// GFX11: [0x08,0x03,0x99,0xf0,0xf1,0x05,0x02,0x0c]

	image_sample_d_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_d_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]			// GFX11: [0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]

	image_sample_d_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D			image_sample_d_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
	// GFX11: [0x04,0x03,0x98,0xf0,0xf9,0xfe,0x02,0x0c]			// GFX11: [0x04,0x03,0x98,0xf0,0xf9,0xfe,0x02,0x0c]

	image_sample_d_o v[5:7], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe			image_sample_d_o v[5:7], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
	▲ Show 20 Lines • Show All 808 Lines • Show Last 20 Lines

llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s

	Show First 20 Lines • Show All 157 Lines • ▼ Show 20 Lines
	// GFX11: image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x15,0x00]			// GFX11: image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x15,0x00]

	image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D			image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D
	// GFX11: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]			// GFX11: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]

	image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D			image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
	// GFX11: image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]			// GFX11: image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]

	image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D			image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
	// GFX11: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]			// GFX11: image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]

	image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00]			// GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00]

	image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
	// GFX11: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]			// GFX11: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]

	image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY			image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
	▲ Show 20 Lines • Show All 105 Lines • ▼ Show 20 Lines
	// GFX11: image_msaa_load v[1:4], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1c,0x02,0x60,0xf0,0x05,0x01,0x02,0x00]			// GFX11: image_msaa_load v[1:4], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1c,0x02,0x60,0xf0,0x05,0x01,0x02,0x00]

	image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16			image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16
	// GFX11: image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16 ; encoding: [0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00]			// GFX11: image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16 ; encoding: [0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00]

	image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY			image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
	// GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00]			// GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00]

	image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]			image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
	// GFX11: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]			// GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]

	image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16			image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
	// GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]			// GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]

	image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]			image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
	// GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]			// GFX11: image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]

	image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16			image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
	// GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]			// GFX11: image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]

	image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15]			image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15]
	// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]			// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]

	image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16			image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16
	// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]			// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]

	image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15]			image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15]
	// GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]			// GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]

	image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16			image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16
	// GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]			// GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]

llvm/test/MC/AMDGPU/gfx7_asm_mimg.s

	Show First 20 Lines • Show All 1,842 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0			image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:9], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0x88,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0x8c,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 655 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xac,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 652 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xc8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xcc,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 646 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xe8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xec,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 1,822 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0			image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:9], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa0,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa4,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa8,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xac,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb0,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb4,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb8,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xbc,0xf1,0x01,0x05,0x62,0x00]

	Show All 11 Lines

llvm/test/MC/AMDGPU/gfx8_asm_mimg.s

	Show First 20 Lines • Show All 1,767 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0			image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:9], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0x88,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0x8c,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 676 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xac,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 673 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xc8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xcc,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 667 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xe8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xec,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 1,909 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0			image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:9], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa0,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa4,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa8,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xac,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb0,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb4,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb8,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xbc,0xf1,0x01,0x05,0x62,0x00]

	Show All 14 Lines

llvm/test/MC/AMDGPU/gfx9_asm_mimg.s

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 1,845 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0			image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:9], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1			image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0x88,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0x88,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0x8c,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0x8c,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0x8c,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 694 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xac,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xac,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xac,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 694 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xc8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xc8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xc8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xcc,0xf0,0x01,0x05,0x62,0x00]

	image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xcc,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xcc,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 685 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xe8,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xe8,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xe8,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xec,0xf0,0x01,0x05,0x62,0x00]

	image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xec,0xf0,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xec,0xf0,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 2,005 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0			image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:9], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1			image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa0,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa4,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xa8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xa8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xa8,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xac,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xac,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xac,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0f,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1			image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb0,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb0,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb0,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb4,0xf1,0x01,0x05,0x62,0x00]

	image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb4,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb4,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xb8,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xb8,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xb8,0xf1,0x01,0x05,0x62,0x00]

	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	// CHECK: [0x00,0x0e,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0e,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf			image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
	// CHECK: [0x00,0x0f,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x0f,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
	// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1			image_sample_c_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
	// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
	// CHECK: [0x00,0x11,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x11,0xbc,0xf1,0x01,0x05,0x62,0x00]

	image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc			image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 glc
	// CHECK: [0x00,0x21,0xbc,0xf1,0x01,0x05,0x62,0x00]			// CHECK: [0x00,0x21,0xbc,0xf1,0x01,0x05,0x62,0x00]

	Show All 17 Lines

llvm/test/MC/Disassembler/AMDGPU/gfx1030_new.txt

	Show First 20 Lines • Show All 69 Lines • ▼ Show 20 Lines
	0x01,0x05,0x00,0x0c			0x01,0x05,0x00,0x0c

	# GFX10: v_fmac_legacy_f32_e64 v0, \|v1\|, -v2			# GFX10: v_fmac_legacy_f32_e64 v0, \|v1\|, -v2
	0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40			0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40

	# GFX10: v_fmac_legacy_f32_e64 v0, s1, 2.0			# GFX10: v_fmac_legacy_f32_e64 v0, s1, 2.0
	0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00			0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00

	# GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]			# GFX10: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
	0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00			0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00

	# GFX10: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16			# GFX10: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
	0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40			0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40

	# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]			# GFX10: image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
	0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00			0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00

	# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16			# GFX10: image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
	0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40			0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40

	# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]			# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
	0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00			0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00

	# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16			# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16
	0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00			0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00

	▲ Show 20 Lines • Show All 168 Lines • Show Last 20 Lines

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg.txt

	Show First 20 Lines • Show All 1,242 Lines • ▼ Show 20 Lines
	0x98,0x03,0x50,0xf0,0xfd,0x01,0x03,0x00			0x98,0x03,0x50,0xf0,0xfd,0x01,0x03,0x00

	# GFX11: image_atomic_xor v[1:2], v[2:3], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x98,0x03,0x51,0xf0,0x02,0x01,0x18,0x00]			# GFX11: image_atomic_xor v[1:2], v[2:3], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x98,0x03,0x51,0xf0,0x02,0x01,0x18,0x00]
	0x98,0x03,0x51,0xf0,0x02,0x01,0x18,0x00			0x98,0x03,0x51,0xf0,0x02,0x01,0x18,0x00

	# GFX11: image_atomic_xor v[254:255], v[254:255], ttmp[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe ; encoding: [0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00]			# GFX11: image_atomic_xor v[254:255], v[254:255], ttmp[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe ; encoding: [0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00]
	0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00			0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00

	# GFX11: image_bvh64_intersect_ray v[5:8], v[1:16], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00]			# GFX11: image_bvh64_intersect_ray v[5:8], v[1:12], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00]
	0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00			0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00

	# GFX11: image_bvh64_intersect_ray v[5:8], v[240:255], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00]			# GFX11: image_bvh64_intersect_ray v[5:8], v[240:251], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00]
	0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00			0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00

	# GFX11: image_bvh64_intersect_ray v[5:8], v[1:16], s[100:103] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00]			# GFX11: image_bvh64_intersect_ray v[5:8], v[1:9], s[100:103] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00]
	0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00			0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00

	# GFX11: image_bvh64_intersect_ray v[252:255], v[240:255], ttmp[12:15] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00]			# GFX11: image_bvh64_intersect_ray v[252:255], v[240:248], ttmp[12:15] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00]
	0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00			0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00

	# GFX11: image_bvh_intersect_ray v[5:8], v[1:16], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00]			# GFX11: image_bvh_intersect_ray v[5:8], v[1:11], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00]
	0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00			0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00

	# GFX11: image_bvh_intersect_ray v[5:8], v[240:255], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00]			# GFX11: image_bvh_intersect_ray v[5:8], v[240:250], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00]
	0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00			0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00

	# GFX11: image_bvh_intersect_ray v[5:8], v[1:8], s[100:103] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00]			# GFX11: image_bvh_intersect_ray v[5:8], v[1:8], s[100:103] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00]
	0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00			0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00

	# GFX11: image_bvh_intersect_ray v[252:255], v[248:255], ttmp[12:15] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0xf8,0xfc,0x1e,0x00]			# GFX11: image_bvh_intersect_ray v[252:255], v[248:255], ttmp[12:15] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0xf8,0xfc,0x1e,0x00]
	0x80,0x8f,0x65,0xf0,0xf8,0xfc,0x1e,0x00			0x80,0x8f,0x65,0xf0,0xf8,0xfc,0x1e,0x00

	▲ Show 20 Lines • Show All 1,984 Lines • ▼ Show 20 Lines
	0x00,0x03,0x84,0xf0,0xfc,0x05,0x02,0x0c			0x00,0x03,0x84,0xf0,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c			0x00,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c]			# GFX11: image_sample_c_d v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c]
	0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c			0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]
	0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c			0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x84,0xf0,0xf9,0xfe,0x02,0x0c]			# GFX11: image_sample_c_d v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x84,0xf0,0xf9,0xfe,0x02,0x0c]
	0x04,0x03,0x84,0xf0,0xf9,0xfe,0x02,0x0c			0x04,0x03,0x84,0xf0,0xf9,0xfe,0x02,0x0c

	▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines
	0x00,0x03,0x10,0xf1,0xfb,0x05,0x02,0x0c			0x00,0x03,0x10,0xf1,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c]
	0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c			0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[240:250], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]
	0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c			0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x10,0xf1,0xf8,0xfe,0x02,0x0c]			# GFX11: image_sample_c_d_cl v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x10,0xf1,0xf8,0xfe,0x02,0x0c]
	0x04,0x03,0x10,0xf1,0xf8,0xfe,0x02,0x0c			0x04,0x03,0x10,0xf1,0xf8,0xfe,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x11,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_cl v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x11,0xf1,0x01,0x05,0x22,0x0c]
	0x04,0x03,0x11,0xf1,0x01,0x05,0x22,0x0c			0x04,0x03,0x11,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_cl v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c]			# GFX11: image_sample_c_d_cl v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c]
	0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c			0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c

	# GFX11: image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c]
	0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c			0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_cl v255, v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c]
	0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c			0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c]
	0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c			0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_cl v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x13,0xf1,0xf9,0xfe,0x22,0x0c]			# GFX11: image_sample_c_d_cl v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x13,0xf1,0xf9,0xfe,0x22,0x0c]
	0x0c,0x03,0x13,0xf1,0xf9,0xfe,0x22,0x0c			0x0c,0x03,0x13,0xf1,0xf9,0xfe,0x22,0x0c

	# GFX11: image_sample_c_d_cl v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x10,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x10,0xf1,0x01,0x05,0x02,0x0c]
	0x10,0x04,0x10,0xf1,0x01,0x05,0x02,0x0c			0x10,0x04,0x10,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x10,0xf1,0xfa,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_cl v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x10,0xf1,0xfa,0xff,0x02,0x0c]
	0x10,0x04,0x10,0xf1,0xfa,0xff,0x02,0x0c			0x10,0x04,0x10,0xf1,0xfa,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x11,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_cl v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x11,0xf1,0x01,0x05,0x22,0x0c]
	0x10,0x04,0x11,0xf1,0x01,0x05,0x22,0x0c			0x10,0x04,0x11,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_cl v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c]			# GFX11: image_sample_c_d_cl v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c]
	0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c			0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c

	# GFX11: image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c]
	0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c			0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_cl v255, v[240:248], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c]
	0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c			0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_cl v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64]			# GFX11: image_sample_c_d_cl v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64]
	0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64			0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64

	# GFX11: image_sample_c_d_cl v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0x13,0xf1,0xf9,0xfe,0x7d,0x78]			# GFX11: image_sample_c_d_cl v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0x13,0xf1,0xf9,0xfe,0x7d,0x78]
	0x94,0x74,0x13,0xf1,0xf9,0xfe,0x7d,0x78			0x94,0x74,0x13,0xf1,0xf9,0xfe,0x7d,0x78

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x50,0xf1,0xfb,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x50,0xf1,0xfb,0x05,0x02,0x0c]
	0x00,0x03,0x50,0xf1,0xfb,0x05,0x02,0x0c			0x00,0x03,0x50,0xf1,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c]
	0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c			0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x51,0xf1,0xf9,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x51,0xf1,0xf9,0x05,0x02,0x0c]
	0x08,0x03,0x51,0xf1,0xf9,0x05,0x02,0x0c			0x08,0x03,0x51,0xf1,0xf9,0x05,0x02,0x0c

	▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines
	0x00,0x03,0x28,0xf1,0xfa,0x05,0x02,0x0c			0x00,0x03,0x28,0xf1,0xfa,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c]
	0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c			0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[1:12], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[240:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
	0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c			0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[254:255], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v[254:255], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c]
	0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c			0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:7], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:7], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c]
	0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c			0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_cl_o v[253:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c]			# GFX11: image_sample_c_d_cl_o v[253:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c]
	0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c			0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c

	# GFX11: image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c]
	0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c			0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
	0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c			0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x2b,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x2b,0xf1,0x01,0x05,0x22,0x0c]
	0x0c,0x03,0x2b,0xf1,0x01,0x05,0x22,0x0c			0x0c,0x03,0x2b,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x2b,0xf1,0xf8,0xfe,0x22,0x0c]			# GFX11: image_sample_c_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x2b,0xf1,0xf8,0xfe,0x22,0x0c]
	0x0c,0x03,0x2b,0xf1,0xf8,0xfe,0x22,0x0c			0x0c,0x03,0x2b,0xf1,0xf8,0xfe,0x22,0x0c

	# GFX11: image_sample_c_d_cl_o v5, v[1:7], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x28,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v5, v[1:7], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x28,0xf1,0x01,0x05,0x02,0x0c]
	0x10,0x04,0x28,0xf1,0x01,0x05,0x02,0x0c			0x10,0x04,0x28,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v255, v[249:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x28,0xf1,0xf9,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v255, v[249:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x28,0xf1,0xf9,0xff,0x02,0x0c]
	0x10,0x04,0x28,0xf1,0xf9,0xff,0x02,0x0c			0x10,0x04,0x28,0xf1,0xf9,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x29,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_cl_o v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x29,0xf1,0x01,0x05,0x22,0x0c]
	0x10,0x04,0x29,0xf1,0x01,0x05,0x22,0x0c			0x10,0x04,0x29,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_cl_o v[254:255], v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c]			# GFX11: image_sample_c_d_cl_o v[254:255], v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c]
	0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c			0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c

	# GFX11: image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c]
	0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c			0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
	0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c			0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x2b,0xf1,0x01,0x05,0x38,0x64]			# GFX11: image_sample_c_d_cl_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x2b,0xf1,0x01,0x05,0x38,0x64]
	0x14,0x04,0x2b,0xf1,0x01,0x05,0x38,0x64			0x14,0x04,0x2b,0xf1,0x01,0x05,0x38,0x64

	# GFX11: image_sample_c_d_cl_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0x2b,0xf1,0xf8,0xfe,0x7d,0x78]			# GFX11: image_sample_c_d_cl_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0x2b,0xf1,0xf8,0xfe,0x7d,0x78]
	0x94,0x74,0x2b,0xf1,0xf8,0xfe,0x7d,0x78			0x94,0x74,0x2b,0xf1,0xf8,0xfe,0x7d,0x78

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[250:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x58,0xf1,0xfa,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[250:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x58,0xf1,0xfa,0x05,0x02,0x0c]
	0x00,0x03,0x58,0xf1,0xfa,0x05,0x02,0x0c			0x00,0x03,0x58,0xf1,0xfa,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c]
	0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c			0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x59,0xf1,0xf8,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x59,0xf1,0xf8,0x05,0x02,0x0c]
	0x08,0x03,0x59,0xf1,0xf8,0x05,0x02,0x0c			0x08,0x03,0x59,0xf1,0xf8,0x05,0x02,0x0c

	▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
	0x00,0x03,0xac,0xf0,0xfb,0x05,0x02,0x0c			0x00,0x03,0xac,0xf0,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]
	0x00,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c			0x00,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c]
	0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c			0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[240:250], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]
	0x04,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c			0x04,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0xac,0xf0,0xf8,0xfe,0x02,0x0c]			# GFX11: image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0xac,0xf0,0xf8,0xfe,0x02,0x0c]
	0x04,0x03,0xac,0xf0,0xf8,0xfe,0x02,0x0c			0x04,0x03,0xac,0xf0,0xf8,0xfe,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0xad,0xf0,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0xad,0xf0,0x01,0x05,0x22,0x0c]
	0x04,0x03,0xad,0xf0,0x01,0x05,0x22,0x0c			0x04,0x03,0xad,0xf0,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c]			# GFX11: image_sample_c_d_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c]
	0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c			0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c

	# GFX11: image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c]
	0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c			0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_o v255, v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c]
	0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c			0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c]
	0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c			0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0xaf,0xf0,0xf8,0xfe,0x22,0x0c]			# GFX11: image_sample_c_d_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0xaf,0xf0,0xf8,0xfe,0x22,0x0c]
	0x0c,0x03,0xaf,0xf0,0xf8,0xfe,0x22,0x0c			0x0c,0x03,0xaf,0xf0,0xf8,0xfe,0x22,0x0c

	# GFX11: image_sample_c_d_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0xac,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0xac,0xf0,0x01,0x05,0x02,0x0c]
	0x10,0x04,0xac,0xf0,0x01,0x05,0x02,0x0c			0x10,0x04,0xac,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0xac,0xf0,0xfa,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0xac,0xf0,0xfa,0xff,0x02,0x0c]
	0x10,0x04,0xac,0xf0,0xfa,0xff,0x02,0x0c			0x10,0x04,0xac,0xf0,0xfa,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0xad,0xf0,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0xad,0xf0,0x01,0x05,0x22,0x0c]
	0x10,0x04,0xad,0xf0,0x01,0x05,0x22,0x0c			0x10,0x04,0xad,0xf0,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_c_d_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c]			# GFX11: image_sample_c_d_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c]
	0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c			0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c

	# GFX11: image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c]
	0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c			0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_c_d_o v255, v[240:248], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c]
	0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c			0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64]			# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64]
	0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64			0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64

	# GFX11: image_sample_c_d_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0xaf,0xf0,0xf8,0xfe,0x7d,0x78]			# GFX11: image_sample_c_d_o v[254:255], v[248:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0xaf,0xf0,0xf8,0xfe,0x7d,0x78]
	0x94,0x74,0xaf,0xf0,0xf8,0xfe,0x7d,0x78			0x94,0x74,0xaf,0xf0,0xf8,0xfe,0x7d,0x78

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]
	0x00,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c			0x00,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0xf0,0xf0,0xfb,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0xf0,0xf0,0xfb,0x05,0x02,0x0c]
	0x00,0x03,0xf0,0xf0,0xfb,0x05,0x02,0x0c			0x00,0x03,0xf0,0xf0,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]
	0x00,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c			0x00,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c]
	0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c			0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_c_d_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xf1,0xf0,0xf8,0x05,0x02,0x0c]			# GFX11: image_sample_c_d_o_g16 v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xf1,0xf0,0xf8,0x05,0x02,0x0c]
	0x08,0x03,0xf1,0xf0,0xf8,0x05,0x02,0x0c			0x08,0x03,0xf1,0xf0,0xf8,0x05,0x02,0x0c

	▲ Show 20 Lines • Show All 556 Lines • ▼ Show 20 Lines
	0x00,0x03,0x70,0xf0,0xfd,0x05,0x02,0x0c			0x00,0x03,0x70,0xf0,0xfd,0x05,0x02,0x0c

	# GFX11: image_sample_d v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c			0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c]			# GFX11: image_sample_d v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c]
	0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c			0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c

	# GFX11: image_sample_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_d v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_d v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x71,0xf0,0xf8,0x05,0x02,0x0c]			# GFX11: image_sample_d v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x71,0xf0,0xf8,0x05,0x02,0x0c]
	0x08,0x03,0x71,0xf0,0xf8,0x05,0x02,0x0c			0x08,0x03,0x71,0xf0,0xf8,0x05,0x02,0x0c

	▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines
	0x00,0x03,0x04,0xf1,0xfc,0x05,0x02,0x0c			0x00,0x03,0x04,0xf1,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c]
	0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c			0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x05,0xf1,0xf8,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl v[5:6], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x05,0xf1,0xf8,0x05,0x02,0x0c]
	0x08,0x03,0x05,0xf1,0xf8,0x05,0x02,0x0c			0x08,0x03,0x05,0xf1,0xf8,0x05,0x02,0x0c

	▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
	0x00,0x03,0x1c,0xf1,0xfb,0x05,0x02,0x0c			0x00,0x03,0x1c,0xf1,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c]
	0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c			0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[240:250], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]
	0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c			0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x1c,0xf1,0xf8,0xfe,0x02,0x0c]			# GFX11: image_sample_d_cl_o v[254:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x1c,0xf1,0xf8,0xfe,0x02,0x0c]
	0x04,0x03,0x1c,0xf1,0xf8,0xfe,0x02,0x0c			0x04,0x03,0x1c,0xf1,0xf8,0xfe,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x1d,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_d_cl_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x1d,0xf1,0x01,0x05,0x22,0x0c]
	0x04,0x03,0x1d,0xf1,0x01,0x05,0x22,0x0c			0x04,0x03,0x1d,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_d_cl_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c]			# GFX11: image_sample_d_cl_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c]
	0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c			0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c

	# GFX11: image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c]
	0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c			0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_d_cl_o v255, v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
	0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c			0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c]
	0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c			0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_d_cl_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x1f,0xf1,0xf9,0xfe,0x22,0x0c]			# GFX11: image_sample_d_cl_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x1f,0xf1,0xf9,0xfe,0x22,0x0c]
	0x0c,0x03,0x1f,0xf1,0xf9,0xfe,0x22,0x0c			0x0c,0x03,0x1f,0xf1,0xf9,0xfe,0x22,0x0c

	# GFX11: image_sample_d_cl_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x1c,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v5, v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x1c,0xf1,0x01,0x05,0x02,0x0c]
	0x10,0x04,0x1c,0xf1,0x01,0x05,0x02,0x0c			0x10,0x04,0x1c,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x1c,0xf1,0xfa,0xff,0x02,0x0c]			# GFX11: image_sample_d_cl_o v255, v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x04,0x1c,0xf1,0xfa,0xff,0x02,0x0c]
	0x10,0x04,0x1c,0xf1,0xfa,0xff,0x02,0x0c			0x10,0x04,0x1c,0xf1,0xfa,0xff,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x1d,0xf1,0x01,0x05,0x22,0x0c]			# GFX11: image_sample_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x1d,0xf1,0x01,0x05,0x22,0x0c]
	0x10,0x04,0x1d,0xf1,0x01,0x05,0x22,0x0c			0x10,0x04,0x1d,0xf1,0x01,0x05,0x22,0x0c

	# GFX11: image_sample_d_cl_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c]			# GFX11: image_sample_d_cl_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c]
	0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c			0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c

	# GFX11: image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c]
	0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c			0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c]			# GFX11: image_sample_d_cl_o v255, v[240:248], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
	0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c			0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c

	# GFX11: image_sample_d_cl_o v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64]			# GFX11: image_sample_d_cl_o v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64]
	0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64			0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64

	# GFX11: image_sample_d_cl_o v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0x1f,0xf1,0xf9,0xfe,0x7d,0x78]			# GFX11: image_sample_d_cl_o v[254:255], v[249:255], ttmp[8:15], ttmp[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc slc dlc a16 tfe lwe d16 ; encoding: [0x94,0x74,0x1f,0xf1,0xf9,0xfe,0x7d,0x78]
	0x94,0x74,0x1f,0xf1,0xf9,0xfe,0x7d,0x78			0x94,0x74,0x1f,0xf1,0xf9,0xfe,0x7d,0x78

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x54,0xf1,0xfb,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x54,0xf1,0xfb,0x05,0x02,0x0c]
	0x00,0x03,0x54,0xf1,0xfb,0x05,0x02,0x0c			0x00,0x03,0x54,0xf1,0xfb,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c			0x00,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c]
	0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c			0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c			0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c			0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_cl_o_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x55,0xf1,0xf9,0x05,0x02,0x0c]			# GFX11: image_sample_d_cl_o_g16 v[5:6], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x55,0xf1,0xf9,0x05,0x02,0x0c]
	0x08,0x03,0x55,0xf1,0xf9,0x05,0x02,0x0c			0x08,0x03,0x55,0xf1,0xf9,0x05,0x02,0x0c

	▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
	0x00,0x03,0x98,0xf0,0xfc,0x05,0x02,0x0c			0x00,0x03,0x98,0xf0,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]
	0x00,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c			0x00,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c]			# GFX11: image_sample_d_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c]
	0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c			0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]
	0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c			0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c]			# GFX11: image_sample_d_o v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c]
	0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c			0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]			# GFX11: image_sample_d_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]
	0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c			0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c

	# GFX11: image_sample_d_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x98,0xf0,0xf9,0xfe,0x02,0x0c]			# GFX11: image_sample_d_o v[254:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x98,0xf0,0xf9,0xfe,0x02,0x0c]
	0x04,0x03,0x98,0xf0,0xf9,0xfe,0x02,0x0c			0x04,0x03,0x98,0xf0,0xf9,0xfe,0x02,0x0c

	▲ Show 20 Lines • Show All 809 Lines • Show Last 20 Lines

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt

	Show First 20 Lines • Show All 156 Lines • ▼ Show 20 Lines
	0x15,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x15,0x00			0x15,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x15,0x00

	# GFX11: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]			# GFX11: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]
	0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00			0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00

	# GFX11: image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]			# GFX11: image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
	0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64			0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64

	# GFX11: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]			# GFX11: image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
	0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64			0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64

	# GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00]			# GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00]
	0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00			0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00

	# GFX11: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]			# GFX11: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00]
	0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00			0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00

	▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines
	0x1c,0x02,0x60,0xf0,0x05,0x01,0x02,0x00			0x1c,0x02,0x60,0xf0,0x05,0x01,0x02,0x00

	# GFX11: image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16 ; encoding: [0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00]			# GFX11: image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16 ; encoding: [0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00]
	0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00			0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00

	# GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00]			# GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00]
	0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00			0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00

	# GFX11: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]			# GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]
	0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00			0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00

	# GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]			# GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]
	0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00			0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00

	# GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]			# GFX11: image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]
	0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00			0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00

	# GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]			# GFX11: image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]
	0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00			0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00

	# GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]			# GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]
	0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f			0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f

	# GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]			# GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]
	0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00			0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00

	# GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]			# GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]
	0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f			0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f

	# GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]			# GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]
	0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00			0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Add support for new LLVM vector typesClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 477134

llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp

llvm/lib/Target/AMDGPU/MIMGInstructions.td

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

llvm/lib/Target/AMDGPU/SIInstructions.td

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll

llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll

llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll

llvm/test/Analysis/CostModel/AMDGPU/fadd.ll

llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll

llvm/test/Analysis/CostModel/AMDGPU/fma.ll

llvm/test/Analysis/CostModel/AMDGPU/fmul.ll

llvm/test/Analysis/CostModel/AMDGPU/fsub.ll

llvm/test/Analysis/CostModel/AMDGPU/mul.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll

llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir

llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll

llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll

llvm/test/CodeGen/AMDGPU/function-returns.ll

llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll

llvm/test/CodeGen/AMDGPU/ipra-regmask.ll

llvm/test/CodeGen/AMDGPU/kernel-args.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll

llvm/test/CodeGen/AMDGPU/load-constant-i32.ll

llvm/test/CodeGen/AMDGPU/load-global-f32.ll

llvm/test/CodeGen/AMDGPU/load-global-i32.ll

llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir

llvm/test/CodeGen/AMDGPU/select.f16.ll

llvm/test/CodeGen/AMDGPU/v_madak_f16.ll

llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir

llvm/test/MC/AMDGPU/gfx1013.s

llvm/test/MC/AMDGPU/gfx1030_new.s

llvm/test/MC/AMDGPU/gfx10_asm_mimg.s

llvm/test/MC/AMDGPU/gfx10_unsupported.s

llvm/test/MC/AMDGPU/gfx11_asm_mimg.s

llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s

llvm/test/MC/AMDGPU/gfx7_asm_mimg.s

llvm/test/MC/AMDGPU/gfx8_asm_mimg.s

llvm/test/MC/AMDGPU/gfx9_asm_mimg.s

llvm/test/MC/Disassembler/AMDGPU/gfx1030_new.txt

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg.txt

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt

[AMDGPU] Add support for new LLVM vector types
ClosedPublic