Diff 539154

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,249 Lines • ▼ Show 20 Lines	LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {

MI.eraseFromParent();		MI.eraseFromParent();
return Legalized;		return Legalized;
}		}

// f64 -> f16 conversion using round-to-nearest-even rounding mode.		// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult		LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {		LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
		const LLT S1 = LLT::scalar(1);
		const LLT S16 = LLT::scalar(16);
		const LLT S32 = LLT::scalar(32);
		const LLT S64 = LLT::scalar(64);

auto [Dst, Src] = MI.getFirst2Regs();		auto [Dst, Src] = MI.getFirst2Regs();
		assert(MRI.getType(Dst).getScalarType() == S16 &&
		MRI.getType(Src).getScalarType() == S64);
		chapuniUnsubmitted Not Done Reply Inline Actions `S16` and `S64` are used only here in -Asserts. chapuni: `S16` and `S64` are used only here in -Asserts.
		kosarevAuthorUnsubmitted Done Reply Inline Actions Fixed in rGe705b2b1f4a7, thanks. kosarev: Fixed in rGe705b2b1f4a7, thanks.

if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.		if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;		return UnableToLegalize;

		if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
		unsigned Flags = MI.getFlags();
		auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
		MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
		MI.eraseFromParent();
		return Legalized;
		}

const unsigned ExpMask = 0x7ff;		const unsigned ExpMask = 0x7ff;
const unsigned ExpBiasf64 = 1023;		const unsigned ExpBiasf64 = 1023;
const unsigned ExpBiasf16 = 15;		const unsigned ExpBiasf16 = 15;
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);

auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);		auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
Register U = Unmerge.getReg(0);		Register U = Unmerge.getReg(0);
Register UH = Unmerge.getReg(1);		Register UH = Unmerge.getReg(1);

auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));		auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));		E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));

▲ Show 20 Lines • Show All 1,820 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 17,028 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())		if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
return N0.getOperand(0);		return N0.getOperand(0);

// fold (fp_round (fp_round x)) -> (fp_round x)		// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {		if (N0.getOpcode() == ISD::FP_ROUND) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;		const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;		const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;

		// Avoid folding legal fp_rounds into non-legal ones.
		if (!hasOperation(ISD::FP_ROUND, VT))
		return SDValue();

		kosarevAuthorUnsubmitted Done Reply Inline Actions This doesn't seem to cause any test failures here upstream and eliminates the need for the True16 f16 = fp_round f64 pattern downstream. Not sure if we want that in a separate patch or better keep here to provide some context. It looks problematic to give it a real test without having some True16 support. kosarev: This doesn't seem to cause any test failures here upstream and eliminates the need for the…
		arsenmUnsubmitted Not Done Reply Inline Actions I think you're supposed to be checking LegalOperations, not LegalDAG. Use the hasOperation helper? arsenm: I think you're supposed to be checking LegalOperations, not LegalDAG. Use the hasOperation…
		kosarevAuthorUnsubmitted Done Reply Inline Actions For some reason that I don't quite understand `LegalOperations` gets raised as soon as vector operations are legalised: LegalOperations = Level >= AfterLegalizeVectorOps; So relying on that flag would mean we forbid the fold during the `AfterLegalizeVectorOps` combine. kosarev: For some reason that I don't quite understand `LegalOperations` gets raised as soon as vector…
		foadUnsubmitted Not Done Reply Inline Actions "So relying on that flag would mean we forbid the fold during the AfterLegalizeVectorOps combine." - why is that a problem? foad: "So relying on that flag would mean we forbid the fold during the AfterLegalizeVectorOps…
		kosarevAuthorUnsubmitted Done Reply Inline Actions I'm thinking of cases where legalisation of vector operations may result in a chain of fp_rounds, which won't be combined before legalisation of non-vector operations because we forbid the fold too early. kosarev: I'm thinking of cases where legalisation of vector operations may result in a chain of…
		foadUnsubmitted Not Done Reply Inline Actions LegalizeVectorOps mostly just scalarizes things, so it is unlikely to introduce fp_rounds unless there were already (vector typed) fp_rounds in the input, which could have been combined earlier. So I don't think this will be a problem. I would go with `LegalOperations` unless/until you have a real example of a problem that we can look at. foad: LegalizeVectorOps mostly just scalarizes things, so it is unlikely to introduce fp_rounds…
// Skip this folding if it results in an fp_round from f80 to f16.		// Skip this folding if it results in an fp_round from f80 to f16.
//		//
// f80 to f16 always generates an expensive (and as yet, unimplemented)		// f80 to f16 always generates an expensive (and as yet, unimplemented)
// libcall to __truncxfhf2 instead of selecting native f16 conversion		// libcall to __truncxfhf2 instead of selecting native f16 conversion
// instructions from f32 or f64. Moreover, the first (value-preserving)		// instructions from f32 or f64. Moreover, the first (value-preserving)
// fp_round from f80 to either f32 or f64 may become a NOP in platforms like		// fp_round from f80 to either f32 or f64 may become a NOP in platforms like
// x86.		// x86.
if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)		if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
▲ Show 20 Lines • Show All 10,501 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

	Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines
	; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]			; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
	; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0			; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
	; VI-SDAG-NEXT: s_endpgm			; VI-SDAG-NEXT: s_endpgm
	;			;
	; VI-GISEL-LABEL: fptrunc_f64_to_f16:			; VI-GISEL-LABEL: fptrunc_f64_to_f16:
	; VI-GISEL: ; %bb.0: ; %entry			; VI-GISEL: ; %bb.0: ; %entry
	; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-GISEL-NEXT: s_movk_i32 s4, 0x7e00
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0			; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_bfe_u32 s5, s3, 0xb0014			; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; VI-GISEL-NEXT: s_lshr_b32 s6, s3, 8
	; VI-GISEL-NEXT: s_and_b32 s7, s3, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s5, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s2, s7, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s6, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_cselect_b32 s4, s4, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s7, 1, s5
	; VI-GISEL-NEXT: s_lshl_b32 s6, s5, 12
	; VI-GISEL-NEXT: s_max_i32 s7, s7, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s2, s6
	; VI-GISEL-NEXT: s_min_i32 s7, s7, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s2, 12
	; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7
	; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7
	; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s8, s2
	; VI-GISEL-NEXT: s_cmp_lt_i32 s5, 1
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s2, 7
	; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s7, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 1
	; VI-GISEL-NEXT: s_add_i32 s2, s2, s6
	; VI-GISEL-NEXT: s_cmp_gt_i32 s5, 30
	; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s2, s4, s2
	; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
	; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; VI-GISEL-NEXT: s_mov_b32 s2, -1			; VI-GISEL-NEXT: s_mov_b32 s2, -1
	; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000			; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
				; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-GISEL-NEXT: s_endpgm			; VI-GISEL-NEXT: s_endpgm
	;			;
	; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX9-SDAG: ; %bb.0: ; %entry			; GFX9-SDAG: ; %bb.0: ; %entry
	; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000			; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
	; GFX9-SDAG-NEXT: s_mov_b32 s6, -1			; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
	Show All 9 Lines
	; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]			; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
	; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0			; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
	; GFX9-SDAG-NEXT: s_endpgm			; GFX9-SDAG-NEXT: s_endpgm
	;			;
	; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX9-GISEL: ; %bb.0: ; %entry			; GFX9-GISEL: ; %bb.0: ; %entry
	; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7e00
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0			; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_bfe_u32 s5, s3, 0xb0014			; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX9-GISEL-NEXT: s_lshr_b32 s6, s3, 8
	; GFX9-GISEL-NEXT: s_and_b32 s7, s3, 0x1ff
	; GFX9-GISEL-NEXT: s_addk_i32 s5, 0xfc10
	; GFX9-GISEL-NEXT: s_and_b32 s6, s6, 0xffe
	; GFX9-GISEL-NEXT: s_or_b32 s2, s7, s2
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, 0x7c00
	; GFX9-GISEL-NEXT: s_sub_i32 s7, 1, s5
	; GFX9-GISEL-NEXT: s_lshl_b32 s6, s5, 12
	; GFX9-GISEL-NEXT: s_max_i32 s7, s7, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s2, s6
	; GFX9-GISEL-NEXT: s_min_i32 s7, s7, 13
	; GFX9-GISEL-NEXT: s_bitset1_b32 s2, 12
	; GFX9-GISEL-NEXT: s_lshr_b32 s8, s2, s7
	; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, s7
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s7, s2
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s2, s8, s2
	; GFX9-GISEL-NEXT: s_cmp_lt_i32 s5, 1
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX9-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX9-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX9-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s5, 30
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s4, s2
	; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX9-GISEL-NEXT: s_mov_b32 s2, -1			; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
	; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000			; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
				; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX9-GISEL-NEXT: s_endpgm			; GFX9-GISEL-NEXT: s_endpgm
	;			;
	; GFX11-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX11-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX11-SDAG: ; %bb.0: ; %entry			; GFX11-SDAG: ; %bb.0: ; %entry
	; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-SDAG-NEXT: s_mov_b32 s6, -1			; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
	; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000			; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
	Show All 14 Lines
	; GFX11-SDAG-NEXT: s_endpgm			; GFX11-SDAG-NEXT: s_endpgm
	;			;
	; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX11-GISEL: ; %bb.0: ; %entry			; GFX11-GISEL: ; %bb.0: ; %entry
	; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0			; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX11-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; GFX11-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; GFX11-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX11-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s4
	; GFX11-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s9, s4, 12
	; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX11-GISEL-NEXT: s_or_b32 s2, s2, s9
	; GFX11-GISEL-NEXT: s_lshr_b32 s8, s7, s6
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s7
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX11-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s2, -1			; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
				; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
				; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
				; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-GISEL-NEXT: s_endpgm			; GFX11-GISEL-NEXT: s_endpgm
	ptr addrspace(1) %r,			ptr addrspace(1) %r,
	ptr addrspace(1) %a) {			ptr addrspace(1) %a) {
	entry:			entry:
	%a.val = load double, ptr addrspace(1) %a			%a.val = load double, ptr addrspace(1) %a
	%r.val = fptrunc double %a.val to half			%r.val = fptrunc double %a.val to half
	▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines
	; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0			; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
	; VI-SDAG-NEXT: s_endpgm			; VI-SDAG-NEXT: s_endpgm
	;			;
	; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:			; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
	; VI-GISEL: ; %bb.0: ; %entry			; VI-GISEL: ; %bb.0: ; %entry
	; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0			; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
	; VI-GISEL-NEXT: s_movk_i32 s2, 0x7e00			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
				; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
				; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
				; VI-GISEL-NEXT: v_or_b32_e32 v2, v0, v1
	; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0			; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
	; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1			; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014
	; VI-GISEL-NEXT: s_lshr_b32 s8, s5, 8
	; VI-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s4, s9, s4
	; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s4, s8, s4
	; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; VI-GISEL-NEXT: s_cselect_b32 s8, s2, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s10, 1, s3
	; VI-GISEL-NEXT: s_lshl_b32 s9, s3, 12
	; VI-GISEL-NEXT: s_max_i32 s10, s10, 0
	; VI-GISEL-NEXT: s_or_b32 s9, s4, s9
	; VI-GISEL-NEXT: s_min_i32 s10, s10, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s4, 12
	; VI-GISEL-NEXT: s_lshr_b32 s11, s4, s10
	; VI-GISEL-NEXT: s_lshl_b32 s10, s11, s10
	; VI-GISEL-NEXT: s_cmp_lg_u32 s10, s4
	; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s4, s11, s4
	; VI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
	; VI-GISEL-NEXT: s_cselect_b32 s4, s4, s9
	; VI-GISEL-NEXT: s_and_b32 s9, s4, 7
	; VI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s9, 3
	; VI-GISEL-NEXT: s_cselect_b32 s10, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s9, 5
	; VI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s9, s10, s9
	; VI-GISEL-NEXT: s_and_b32 s9, s9, 1
	; VI-GISEL-NEXT: s_add_i32 s4, s4, s9
	; VI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
	; VI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s3, s8, s4
	; VI-GISEL-NEXT: s_lshr_b32 s4, s5, 16
	; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s3, s4, s3
	; VI-GISEL-NEXT: s_bfe_u32 s4, s7, 0xb0014
	; VI-GISEL-NEXT: s_lshr_b32 s5, s7, 8
	; VI-GISEL-NEXT: s_and_b32 s8, s7, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s6, s8, s6
	; VI-GISEL-NEXT: s_cmp_lg_u32 s6, 0
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s5, s5, s6
	; VI-GISEL-NEXT: s_cmp_lg_u32 s5, 0
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s8, 1, s4
	; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12
	; VI-GISEL-NEXT: s_max_i32 s8, s8, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s5, s6
	; VI-GISEL-NEXT: s_min_i32 s8, s8, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s5, 12
	; VI-GISEL-NEXT: s_lshr_b32 s9, s5, s8
	; VI-GISEL-NEXT: s_lshl_b32 s8, s9, s8
	; VI-GISEL-NEXT: s_cmp_lg_u32 s8, s5
	; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s5, s9, s5
	; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; VI-GISEL-NEXT: s_cselect_b32 s5, s5, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s5, 7
	; VI-GISEL-NEXT: s_lshr_b32 s5, s5, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s8, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 1
	; VI-GISEL-NEXT: s_add_i32 s5, s5, s6
	; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; VI-GISEL-NEXT: s_cselect_b32 s5, 0x7c00, s5
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s5
	; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 16
	; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s2, s4, s2
	; VI-GISEL-NEXT: s_and_b32 s2, s2, 0xffff
	; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff
	; VI-GISEL-NEXT: s_lshl_b32 s2, s2, 16
	; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
	; VI-GISEL-NEXT: v_mov_b32_e32 v2, s2
	; VI-GISEL-NEXT: flat_store_dword v[0:1], v2			; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
	; VI-GISEL-NEXT: s_endpgm			; VI-GISEL-NEXT: s_endpgm
	;			;
	; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:			; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX9-SDAG: ; %bb.0: ; %entry			; GFX9-SDAG: ; %bb.0: ; %entry
	; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000			; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
	; GFX9-SDAG-NEXT: s_mov_b32 s6, -1			; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
	Show All 12 Lines
	; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0			; GFX9-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
	; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0			; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
	; GFX9-SDAG-NEXT: s_endpgm			; GFX9-SDAG-NEXT: s_endpgm
	;			;
	; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:			; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX9-GISEL: ; %bb.0: ; %entry			; GFX9-GISEL: ; %bb.0: ; %entry
	; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0			; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
	; GFX9-GISEL-NEXT: s_movk_i32 s2, 0x7e00
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014			; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
	; GFX9-GISEL-NEXT: s_lshr_b32 s8, s5, 8			; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
	; GFX9-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff			; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-GISEL-NEXT: s_addk_i32 s3, 0xfc10			; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
	; GFX9-GISEL-NEXT: s_and_b32 s8, s8, 0xffe			; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
	; GFX9-GISEL-NEXT: s_or_b32 s4, s9, s4			; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s4, s8, s4
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s8, s2, 0x7c00
	; GFX9-GISEL-NEXT: s_sub_i32 s10, 1, s3
	; GFX9-GISEL-NEXT: s_lshl_b32 s9, s3, 12
	; GFX9-GISEL-NEXT: s_max_i32 s10, s10, 0
	; GFX9-GISEL-NEXT: s_or_b32 s9, s4, s9
	; GFX9-GISEL-NEXT: s_min_i32 s10, s10, 13
	; GFX9-GISEL-NEXT: s_bitset1_b32 s4, 12
	; GFX9-GISEL-NEXT: s_lshr_b32 s11, s4, s10
	; GFX9-GISEL-NEXT: s_lshl_b32 s10, s11, s10
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s10, s4
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s4, s11, s4
	; GFX9-GISEL-NEXT: s_cmp_lt_i32 s3, 1
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s9
	; GFX9-GISEL-NEXT: s_and_b32 s9, s4, 7
	; GFX9-GISEL-NEXT: s_lshr_b32 s4, s4, 2
	; GFX9-GISEL-NEXT: s_cmp_eq_u32 s9, 3
	; GFX9-GISEL-NEXT: s_cselect_b32 s10, 1, 0
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s9, 5
	; GFX9-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s9, s10, s9
	; GFX9-GISEL-NEXT: s_and_b32 s9, s9, 1
	; GFX9-GISEL-NEXT: s_add_i32 s4, s4, s9
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s3, 30
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
	; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
	; GFX9-GISEL-NEXT: s_cselect_b32 s3, s8, s4
	; GFX9-GISEL-NEXT: s_lshr_b32 s4, s5, 16
	; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; GFX9-GISEL-NEXT: s_or_b32 s3, s4, s3
	; GFX9-GISEL-NEXT: s_bfe_u32 s4, s7, 0xb0014
	; GFX9-GISEL-NEXT: s_lshr_b32 s5, s7, 8
	; GFX9-GISEL-NEXT: s_and_b32 s8, s7, 0x1ff
	; GFX9-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s6, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s5, s5, s6
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s5, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, 0x7c00
	; GFX9-GISEL-NEXT: s_sub_i32 s8, 1, s4
	; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 12
	; GFX9-GISEL-NEXT: s_max_i32 s8, s8, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s5, s6
	; GFX9-GISEL-NEXT: s_min_i32 s8, s8, 13
	; GFX9-GISEL-NEXT: s_bitset1_b32 s5, 12
	; GFX9-GISEL-NEXT: s_lshr_b32 s9, s5, s8
	; GFX9-GISEL-NEXT: s_lshl_b32 s8, s9, s8
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s8, s5
	; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s5, s9, s5
	; GFX9-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX9-GISEL-NEXT: s_cselect_b32 s5, s5, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s5, 7
	; GFX9-GISEL-NEXT: s_lshr_b32 s5, s5, 2
	; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX9-GISEL-NEXT: s_add_i32 s5, s5, s6
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX9-GISEL-NEXT: s_cselect_b32 s5, 0x7c00, s5
	; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s5
	; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 16
	; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; GFX9-GISEL-NEXT: s_or_b32 s2, s4, s2
	; GFX9-GISEL-NEXT: s_pack_ll_b32_b16 s2, s3, s2
	; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]			; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
	; GFX9-GISEL-NEXT: s_endpgm			; GFX9-GISEL-NEXT: s_endpgm
	;			;
	; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:			; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX11-SDAG: ; %bb.0: ; %entry			; GFX11-SDAG: ; %bb.0: ; %entry
	; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-SDAG-NEXT: s_mov_b32 s6, -1			; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
	; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000			; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
	Show All 16 Lines
	; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0			; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
	; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0			; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
	; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-SDAG-NEXT: s_endpgm			; GFX11-SDAG-NEXT: s_endpgm
	;			;
	; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16:			; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX11-GISEL: ; %bb.0: ; %entry			; GFX11-GISEL: ; %bb.0: ; %entry
	; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0			; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff			; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
	; GFX11-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014			; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s5, 8			; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_2)
	; GFX11-GISEL-NEXT: s_or_b32 s4, s8, s4			; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX11-GISEL-NEXT: s_addk_i32 s2, 0xfc10			; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0xffe			; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s4, 0			; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
	; GFX11-GISEL-NEXT: s_cselect_b32 s4, 1, 0			; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s3, s3, s4
	; GFX11-GISEL-NEXT: s_movk_i32 s4, 0x7e00
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s3, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s8, s4, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s9, 1, s2
	; GFX11-GISEL-NEXT: s_or_b32 s10, s3, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s9, s9, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s12, s2, 12
	; GFX11-GISEL-NEXT: s_min_i32 s9, s9, 13
	; GFX11-GISEL-NEXT: s_or_b32 s3, s3, s12
	; GFX11-GISEL-NEXT: s_lshr_b32 s11, s10, s9
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s9, s11, s9
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s9, s10
	; GFX11-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s9, s11, s9
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s2, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, s9, s3
	; GFX11-GISEL-NEXT: s_and_b32 s9, s3, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s9, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s10, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s9, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s9, s10, s9
	; GFX11-GISEL-NEXT: s_and_b32 s9, s9, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s3, s3, s9
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s2, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s8, s3
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s5, 16
	; GFX11-GISEL-NEXT: s_and_b32 s9, s7, 0x1ff
	; GFX11-GISEL-NEXT: s_bfe_u32 s5, s7, 0xb0014
	; GFX11-GISEL-NEXT: s_lshr_b32 s8, s7, 8
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s6, s9, s6
	; GFX11-GISEL-NEXT: s_addk_i32 s5, 0xfc10
	; GFX11-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
	; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s3, s8, s3
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s3, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s4, s4, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s5
	; GFX11-GISEL-NEXT: s_or_b32 s8, s3, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s10, s5, 12
	; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX11-GISEL-NEXT: s_or_b32 s3, s3, s10
	; GFX11-GISEL-NEXT: s_lshr_b32 s9, s8, s6
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s6, s9, s6
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s8
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s9, s6
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s5, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, s6, s3
	; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s8, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s3, s3, s6
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s5, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, s4, s3
	; GFX11-GISEL-NEXT: s_lshr_b32 s4, s7, 16
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s3, s4, s3
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]			; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
	; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-GISEL-NEXT: s_endpgm			; GFX11-GISEL-NEXT: s_endpgm
	ptr addrspace(1) %r,			ptr addrspace(1) %r,
	ptr addrspace(1) %a) {			ptr addrspace(1) %a) {
	entry:			entry:
	%a.val = load <2 x double>, ptr addrspace(1) %a			%a.val = load <2 x double>, ptr addrspace(1) %a
	%r.val = fptrunc <2 x double> %a.val to <2 x half>			%r.val = fptrunc <2 x double> %a.val to <2 x half>
	▲ Show 20 Lines • Show All 750 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fptrunc.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefixes=SI %s			; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefixes=SI %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL %s

	define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {			define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {
	; SI-LABEL: fptrunc_f64_to_f32:			; SI-LABEL: fptrunc_f64_to_f32:
	; SI: ; %bb.0:			; SI: ; %bb.0:
	; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9			; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
	; SI-NEXT: s_mov_b32 s7, 0xf000			; SI-NEXT: s_mov_b32 s7, 0xf000
	; SI-NEXT: s_mov_b32 s6, -1			; SI-NEXT: s_mov_b32 s6, -1
	; SI-NEXT: s_waitcnt lgkmcnt(0)			; SI-NEXT: s_waitcnt lgkmcnt(0)
	▲ Show 20 Lines • Show All 186 Lines • ▼ Show 20 Lines
	; VI-SAFE-SDAG-NEXT: s_cselect_b64 vcc, -1, 0			; VI-SAFE-SDAG-NEXT: s_cselect_b64 vcc, -1, 0
	; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 16			; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 16
	; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc			; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
	; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s4, 0x8000			; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s4, 0x8000
	; VI-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s4, v0			; VI-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s4, v0
	; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-SAFE-SDAG-NEXT: s_endpgm			; VI-SAFE-SDAG-NEXT: s_endpgm
	;			;
	; VI-GISEL-LABEL: fptrunc_f64_to_f16:			; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
	; VI-GISEL: ; %bb.0:			; VI-SAFE-GISEL: ; %bb.0:
	; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014			; VI-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10			; VI-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe			; VI-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s2, s6, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s5, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_movk_i32 s5, 0x7e00			; VI-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; VI-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4			; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4
	; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12			; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12
	; VI-GISEL-NEXT: s_max_i32 s7, s7, 0			; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s2, s6			; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6
	; VI-GISEL-NEXT: s_min_i32 s7, s7, 13			; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s2, 12			; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12
	; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7
	; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7			; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7
	; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2			; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s8, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s8, s2
	; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1			; VI-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s2, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s2, 7			; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
	; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3			; VI-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5			; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s7, s6			; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 1			; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1
	; VI-GISEL-NEXT: s_add_i32 s2, s2, s6			; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
	; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30			; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f			; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000			; VI-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s2, s3, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
	; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2			; VI-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; VI-GISEL-NEXT: s_mov_b32 s2, -1			; VI-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
	; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000			; VI-SAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000
	; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-GISEL-NEXT: s_endpgm			; VI-SAFE-GISEL-NEXT: s_endpgm
	;			;
	; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; VI-UNSAFE-SDAG: ; %bb.0:			; VI-UNSAFE-SDAG: ; %bb.0:
	; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]			; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000			; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000
	; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1			; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-UNSAFE-SDAG-NEXT: s_endpgm			; VI-UNSAFE-SDAG-NEXT: s_endpgm
	;			;
				; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
				; VI-UNSAFE-GISEL: ; %bb.0:
				; VI-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
				; VI-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; VI-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
				; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
				; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000
				; VI-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; VI-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
				; VI-UNSAFE-GISEL-NEXT: s_endpgm
				;
	; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX10-SAFE-SDAG: ; %bb.0:			; GFX10-SAFE-SDAG: ; %bb.0:
	; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff			; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff
	; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8			; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8
	; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2			; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2
	; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe			; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe
	Show All 40 Lines
	; GFX10-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v0, 9, v0			; GFX10-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v0, 9, v0
	; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, 0x7c00, v0			; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, 0x7c00, v0
	; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo			; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo
	; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0			; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0
	; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX10-SAFE-SDAG-NEXT: s_endpgm			; GFX10-SAFE-SDAG-NEXT: s_endpgm
	;			;
	; GFX10-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX10-GISEL: ; %bb.0:			; GFX10-SAFE-GISEL: ; %bb.0:
	; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX10-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
	; GFX10-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014			; GFX10-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; GFX10-GISEL-NEXT: s_lshr_b32 s5, s3, 8			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; GFX10-GISEL-NEXT: s_or_b32 s2, s6, s2			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX10-GISEL-NEXT: s_addk_i32 s4, 0xfc10			; GFX10-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX10-GISEL-NEXT: s_and_b32 s5, s5, 0xffe			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX10-GISEL-NEXT: s_or_b32 s2, s5, s2			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
	; GFX10-GISEL-NEXT: s_movk_i32 s5, 0x7e00			; GFX10-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX10-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; GFX10-GISEL-NEXT: s_sub_i32 s6, 1, s4			; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
	; GFX10-GISEL-NEXT: s_or_b32 s7, s2, 0x1000			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
	; GFX10-GISEL-NEXT: s_max_i32 s6, s6, 0			; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX10-GISEL-NEXT: s_lshl_b32 s9, s4, 12			; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12
	; GFX10-GISEL-NEXT: s_min_i32 s6, s6, 13			; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX10-GISEL-NEXT: s_or_b32 s2, s2, s9			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9
	; GFX10-GISEL-NEXT: s_lshr_b32 s8, s7, s6			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6
	; GFX10-GISEL-NEXT: s_lshl_b32 s6, s8, s6			; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6
	; GFX10-GISEL-NEXT: s_cmp_lg_u32 s6, s7			; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7
	; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX10-GISEL-NEXT: s_or_b32 s6, s8, s6			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX10-GISEL-NEXT: s_cmp_lt_i32 s4, 1			; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, s6, s2			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
	; GFX10-GISEL-NEXT: s_and_b32 s6, s2, 7			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX10-GISEL-NEXT: s_lshr_b32 s2, s2, 2			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX10-GISEL-NEXT: s_cmp_eq_u32 s6, 3			; GFX10-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX10-GISEL-NEXT: s_cselect_b32 s7, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX10-GISEL-NEXT: s_cmp_gt_i32 s6, 5			; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX10-GISEL-NEXT: s_or_b32 s6, s7, s6			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX10-GISEL-NEXT: s_and_b32 s6, s6, 1			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX10-GISEL-NEXT: s_add_i32 s2, s2, s6			; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX10-GISEL-NEXT: s_cmp_gt_i32 s4, 30			; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX10-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f			; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, s5, s2			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; GFX10-GISEL-NEXT: s_lshr_b32 s3, s3, 16			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX10-GISEL-NEXT: s_and_b32 s3, s3, 0x8000			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX10-GISEL-NEXT: s_or_b32 s2, s3, s2			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000			; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
	; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2			; GFX10-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX10-GISEL-NEXT: s_mov_b32 s2, -1			; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
	; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX10-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX10-GISEL-NEXT: s_endpgm			; GFX10-SAFE-GISEL-NEXT: s_endpgm
	;			;
	; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX10-UNSAFE-SDAG: ; %bb.0:			; GFX10-UNSAFE-SDAG: ; %bb.0:
	; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]			; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000			; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
	; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX10-UNSAFE-SDAG-NEXT: s_endpgm			; GFX10-UNSAFE-SDAG-NEXT: s_endpgm
	;			;
				; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
				; GFX10-UNSAFE-GISEL: ; %bb.0:
				; GFX10-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
				; GFX10-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
				; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
				; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
				; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; GFX10-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
				; GFX10-UNSAFE-GISEL-NEXT: s_endpgm
				;
	; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX11-SAFE-SDAG: ; %bb.0:			; GFX11-SAFE-SDAG: ; %bb.0:
	; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff			; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff
	; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8			; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8
	; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2			; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2
	; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe			; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe
	▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo			; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo
	; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)			; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
	; GFX11-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0			; GFX11-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0
	; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-SAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-SAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-SAFE-SDAG-NEXT: s_endpgm			; GFX11-SAFE-SDAG-NEXT: s_endpgm
	;			;
	; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX11-GISEL: ; %bb.0:			; GFX11-SAFE-GISEL: ; %bb.0:
	; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SAFE-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
	; GFX11-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014			; GFX11-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; GFX11-GISEL-NEXT: s_lshr_b32 s5, s3, 8			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; GFX11-GISEL-NEXT: s_or_b32 s2, s6, s2			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_addk_i32 s4, 0xfc10			; GFX11-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX11-GISEL-NEXT: s_and_b32 s5, s5, 0xffe			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s2, s5, s2			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_movk_i32 s5, 0x7e00			; GFX11-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s4			; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
	; GFX11-GISEL-NEXT: s_or_b32 s7, s2, 0x1000			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0			; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s9, s4, 12			; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12
	; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13			; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX11-GISEL-NEXT: s_or_b32 s2, s2, s9			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9
	; GFX11-GISEL-NEXT: s_lshr_b32 s8, s7, s6			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s6, s8, s6			; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s7			; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s8, s6			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s4, 1			; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s6, s2			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_and_b32 s6, s2, 7			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 2			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3			; GFX11-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s7, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5			; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s7, s6			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX11-GISEL-NEXT: s_and_b32 s6, s6, 1			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s2, s2, s6			; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s4, 30			; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f			; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s5, s2			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 16			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000			; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2			; GFX11-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s2, -1			; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
	; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-SAFE-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-GISEL-NEXT: s_endpgm			; GFX11-SAFE-GISEL-NEXT: s_endpgm
	;			;
	; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX11-UNSAFE-SDAG: ; %bb.0:			; GFX11-UNSAFE-SDAG: ; %bb.0:
	; GFX11-UNSAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-UNSAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]			; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000			; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
	; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX11-UNSAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)			; GFX11-UNSAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
	; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX11-UNSAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-UNSAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-UNSAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-UNSAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-UNSAFE-SDAG-NEXT: s_endpgm			; GFX11-UNSAFE-SDAG-NEXT: s_endpgm
				;
				; GFX11-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
				; GFX11-UNSAFE-GISEL: ; %bb.0:
				; GFX11-UNSAFE-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
				; GFX11-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
				; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
				; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
				; GFX11-UNSAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
				; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; GFX11-UNSAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
				; GFX11-UNSAFE-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
				; GFX11-UNSAFE-GISEL-NEXT: s_endpgm
	%result = fptrunc double %in to half			%result = fptrunc double %in to half
	%result_i16 = bitcast half %result to i16			%result_i16 = bitcast half %result to i16
	store i16 %result_i16, ptr addrspace(1) %out			store i16 %result_i16, ptr addrspace(1) %out
	ret void			ret void
	}			}

	define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) {			define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) {
	; SI-LABEL: fptrunc_v2f64_to_v2f32:			; SI-LABEL: fptrunc_v2f64_to_v2f32:
	▲ Show 20 Lines • Show All 454 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[Codegen] Generate fast fp64-to-fp16 conversions in unsafe mode.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 539154

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

llvm/test/CodeGen/AMDGPU/fptrunc.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Codegen] Generate fast fp64-to-fp16 conversions in unsafe mode.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 539154

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

llvm/test/CodeGen/AMDGPU/fptrunc.ll

[Codegen] Generate fast fp64-to-fp16 conversions in unsafe mode.
ClosedPublic