Diff 537417

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Show All 24 Lines
class MachineIRBuilder;		class MachineIRBuilder;

namespace AMDGPU {		namespace AMDGPU {
struct ImageDimIntrinsicInfo;		struct ImageDimIntrinsicInfo;
}		}
/// This class provides the information for the target register banks.		/// This class provides the information for the target register banks.
class AMDGPULegalizerInfo final : public LegalizerInfo {		class AMDGPULegalizerInfo final : public LegalizerInfo {
const GCNSubtarget &ST;		const GCNSubtarget &ST;
		const GCNTargetMachine &TM;

public:		public:
AMDGPULegalizerInfo(const GCNSubtarget &ST,		AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM);		const GCNTargetMachine &TM);

bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;		bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;

Register getSegmentAperture(unsigned AddrSpace,		Register getSegmentAperture(unsigned AddrSpace,
Show All 9 Lines	public:
bool legalizeFrem(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeFrem(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool Signed) const;		MachineIRBuilder &B, bool Signed) const;
bool legalizeFPTOI(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeFPTOI(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool Signed) const;		MachineIRBuilder &B, bool Signed) const;
		bool legalizeFPTRUNC(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeMinNumMaxNum(LegalizerHelper &Helper, MachineInstr &MI) const;		bool legalizeMinNumMaxNum(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;

bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;
▲ Show 20 Lines • Show All 151 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 560 Lines • ▼ Show 20 Lines	static void castBufferRsrcArgToV4I32(MachineInstr &MI, MachineIRBuilder &B,
// Paranoidly prevent us from doing this multiple times.		// Paranoidly prevent us from doing this multiple times.
if (!hasBufferRsrcWorkaround(PointerTy))		if (!hasBufferRsrcWorkaround(PointerTy))
return;		return;
MO.setReg(castBufferRsrcToV4I32(MO.getReg(), B));		MO.setReg(castBufferRsrcToV4I32(MO.getReg(), B));
}		}

AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,		AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const GCNTargetMachine &TM)		const GCNTargetMachine &TM)
: ST(ST_) {		: ST(ST_), TM(TM) {
using namespace TargetOpcode;		using namespace TargetOpcode;

auto GetAddrSpacePtr = [&TM](unsigned AS) {		auto GetAddrSpacePtr = [&TM](unsigned AS) {
return LLT::pointer(AS, TM.getPointerSizeInBits(AS));		return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
};		};

const LLT S1 = LLT::scalar(1);		const LLT S1 = LLT::scalar(1);
const LLT S8 = LLT::scalar(8);		const LLT S8 = LLT::scalar(8);
▲ Show 20 Lines • Show All 375 Lines • ▼ Show 20 Lines	getActionDefinitionsBuilder(G_FFREXP)
.customFor({{S32, S32}, {S64, S32}})		.customFor({{S32, S32}, {S64, S32}})
.scalarize(0)		.scalarize(0)
.minScalar(0, S32)		.minScalar(0, S32)
.clampScalar(1, S32, S32)		.clampScalar(1, S32, S32)
.lower();		.lower();
}		}

getActionDefinitionsBuilder(G_FPTRUNC)		getActionDefinitionsBuilder(G_FPTRUNC)
.legalFor({{S32, S64}, {S16, S32}})		.legalFor({{S32, S64}, {S16, S32}})
		.customFor({S16, S64})
.scalarize(0)		.scalarize(0)
.lower();		.lower();

getActionDefinitionsBuilder(G_FPEXT)		getActionDefinitionsBuilder(G_FPEXT)
.legalFor({{S64, S32}, {S32, S16}})		.legalFor({{S64, S32}, {S32, S16}})
.narrowScalarFor({{S64, S16}}, changeTo(0, S32))		.narrowScalarFor({{S64, S16}}, changeTo(0, S32))
.scalarize(0);		.scalarize(0);

auto &FSubActions = getActionDefinitionsBuilder({G_FSUB, G_STRICT_FSUB});		auto &FSubActions = getActionDefinitionsBuilder({G_FSUB, G_STRICT_FSUB});
if (ST.has16BitInsts()) {		if (ST.has16BitInsts()) {
▲ Show 20 Lines • Show All 996 Lines • ▼ Show 20 Lines	bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_SITOFP:		case TargetOpcode::G_SITOFP:
return legalizeITOFP(MI, MRI, B, true);		return legalizeITOFP(MI, MRI, B, true);
case TargetOpcode::G_UITOFP:		case TargetOpcode::G_UITOFP:
return legalizeITOFP(MI, MRI, B, false);		return legalizeITOFP(MI, MRI, B, false);
case TargetOpcode::G_FPTOSI:		case TargetOpcode::G_FPTOSI:
return legalizeFPTOI(MI, MRI, B, true);		return legalizeFPTOI(MI, MRI, B, true);
case TargetOpcode::G_FPTOUI:		case TargetOpcode::G_FPTOUI:
return legalizeFPTOI(MI, MRI, B, false);		return legalizeFPTOI(MI, MRI, B, false);
		case TargetOpcode::G_FPTRUNC:
		return legalizeFPTRUNC(Helper, MI);
case TargetOpcode::G_FMINNUM:		case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:		case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM_IEEE:		case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:		case TargetOpcode::G_FMAXNUM_IEEE:
return legalizeMinNumMaxNum(Helper, MI);		return legalizeMinNumMaxNum(Helper, MI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:		case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return legalizeExtractVectorElt(MI, MRI, B);		return legalizeExtractVectorElt(MI, MRI, B);
case TargetOpcode::G_INSERT_VECTOR_ELT:		case TargetOpcode::G_INSERT_VECTOR_ELT:
▲ Show 20 Lines • Show All 531 Lines • ▼ Show 20 Lines	B.buildSub(Dst, B.buildXor(S64, B.buildMergeLikeInstr(S64, {Lo, Hi}), Sign),
Sign);		Sign);
} else		} else
B.buildMergeLikeInstr(Dst, {Lo, Hi});		B.buildMergeLikeInstr(Dst, {Lo, Hi});
MI.eraseFromParent();		MI.eraseFromParent();

return true;		return true;
}		}

		bool AMDGPULegalizerInfo::legalizeFPTRUNC(LegalizerHelper &Helper,
		MachineInstr &MI) const {
		auto [DstTy, SrcTy] = MI.getFirst2LLTs();
		assert(DstTy.getScalarType() == LLT::scalar(16) &&
		SrcTy.getScalarType() == LLT::scalar(64));

		if (TM.Options.UnsafeFPMath)
		return true;

		return Helper.lowerFPTRUNC(MI) == LegalizerHelper::Legalized;
		}

bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,		bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
MachineInstr &MI) const {		MachineInstr &MI) const {
MachineFunction &MF = Helper.MIRBuilder.getMF();		MachineFunction &MF = Helper.MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();		const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

		arsenmUnsubmitted Not Done Reply Inline Actions Can move this into the generic code. Also it's unfortunate that we don't have fmf on the cast instructions arsenm: Can move this into the generic code. Also it's unfortunate that we don't have fmf on the cast…
const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE \|\|		const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE \|\|
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;		MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;

// With ieee_mode disabled, the instructions have the correct behavior		// With ieee_mode disabled, the instructions have the correct behavior
// already for G_FMINNUM/G_FMAXNUM		// already for G_FMINNUM/G_FMAXNUM
if (!MFI->getMode().IEEE)		if (!MFI->getMode().IEEE)
return !IsIEEEOp;		return !IsIEEEOp;

▲ Show 20 Lines • Show All 3,838 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/VOP1Instructions.td

	Show First 20 Lines • Show All 510 Lines • ▼ Show 20 Lines
	def : GCNPat<			def : GCNPat<
	(f32 (f16_to_fp i16:$src)),			(f32 (f16_to_fp i16:$src)),
	(V_CVT_F32_F16_e32 $src)			(V_CVT_F32_F16_e32 $src)
	>;			>;
	def : GCNPat<			def : GCNPat<
	(i16 (AMDGPUfp_to_f16 f32:$src)),			(i16 (AMDGPUfp_to_f16 f32:$src)),
	(V_CVT_F16_F32_e32 $src)			(V_CVT_F16_F32_e32 $src)
	>;			>;
				def : GCNPat<
				(f16 (fpround f64:$src)),
				(V_CVT_F16_F32_e64 SRCMODS.NONE, (V_CVT_F32_F64_e64 SRCMODS.NONE, $src))
				>;
				arsenmUnsubmitted Not Done Reply Inline Actions Should just let the expansion split them up, this is missing the source modifiers arsenm: Should just let the expansion split them up, this is missing the source modifiers
				kosarevAuthorUnsubmitted Done Reply Inline Actions Updated to match modifiers. The expansion would combine back to a single fpround f64, so doesn't work. kosarev: Updated to match modifiers. The expansion would combine back to a single fpround f64, so…
				foadUnsubmitted Not Done Reply Inline Actions "The expansion would combine back to a single fpround f64" - that sounds like the combine is broken then? It should respect legality. foad: "The expansion would combine back to a single fpround f64" - that sounds like the combine is…
				kosarevAuthorUnsubmitted Done Reply Inline Actions It's trivial to explain it to GISel that f16 = fp_round f64 should be expanded while f16 = fp_round f32 is legal as it is. However, in SDAG, if I'm not wrong, we can only differentiate by the resulting type, and if we choose custom expansion for all to-f16 roundings, then SDAG will try to combine whatever they result in. It's not an immediate problem for this patch because we always do FP_TO_FP16, because we don't have 16-bit registers here yet. For True16, however, if we want it be a chain of two FP_ROUNDs, the expansion will be combined back. But then if we need the pattern for the True16 case anyway, then I'm not sure doing special work in GlobalISel and for the non-16bit cases is worth it. Would appreciate your opinions. kosarev: It's trivial to explain it to GISel that f16 = fp_round f64 should be expanded while f16 =…
				foadUnsubmitted Not Done Reply Inline Actions SITargetLowering::lowerFP_ROUND is already doing custom legalization for to-f16 fprounds. It expands f64-to-f16 into two steps, and leaves the others alone. I would hope that post-legalization combiners would not create new to-f16 fprounds, because generally after legalization you should not create any new nodes unless they are "Legal" - and this does not include "Custom". foad: SITargetLowering::lowerFP_ROUND is already doing custom legalization for to-f16 fprounds. It…
				kosarevAuthorUnsubmitted Done Reply Inline Actions Well, the two FP_ROUNDs we might be expanding into here are legal. But I'm not sure already-legal expansions should mean no combining attempts. Otherwise, why would we want any combinings after legalization. kosarev: Well, the two FP_ROUNDs we might be expanding into here //are// legal. But I'm not sure already…
				arsenmUnsubmitted Not Done Reply Inline Actions This is one of the consequences of the DAG's single-type legality checks. You need to consider both the source and dest types for legality arsenm: This is one of the consequences of the DAG's single-type legality checks. You need to consider…
				kosarevAuthorUnsubmitted Done Reply Inline Actions I probably misread Jay's comment. Not combining anything post-legalization if this results into a node for which getOperationAction() yields 'Custom' makes sense, yes, kosarev: I probably misread Jay's comment. Not combining anything post-legalization if this results into…
				foadUnsubmitted Not Done Reply Inline Actions "Not combining anything post-legalization if this results into a node for which getOperationAction() yields 'Custom' makes sense" - right, that is my understanding of the desired behaviour. But I can't promise that all existing combines obey it. foad: "Not combining anything post-legalization if this results into a node for which…
				kosarevAuthorUnsubmitted Done Reply Inline Actions This bit in DAGCombiner.cpp reads like the expectation was that combining legal SDAGs is not required to produce legal nodes: // If this combine is running after legalizing the DAG, re-legalize any // nodes pulled off the worklist. if (LegalDAG) { SmallSetVector<SDNode , 16> UpdatedNodes; bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode LN : UpdatedNodes) AddToWorklistWithUsers(LN); ... But then we also have this snippet that clearly addresses the same kind of problem, so not a precedent. // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining // here could cause an infinite loop. That legalizing happens when LegalDAG // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is // scalable. if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() && !(LegalDAG && In.getValueType().isScalableVector())) { ... kosarev: This bit in DAGCombiner.cpp reads like the expectation was that combining legal SDAGs is not…
	}			}
	let OtherPredicates = [HasTrue16BitInsts] in {			let OtherPredicates = [HasTrue16BitInsts] in {
	def : GCNPat<			def : GCNPat<
	(f32 (f16_to_fp i16:$src)),			(f32 (f16_to_fp i16:$src)),
	(V_CVT_F32_F16_t16_e32 $src)			(V_CVT_F32_F16_t16_e32 $src)
	>;			>;
	def : GCNPat<			def : GCNPat<
	(i16 (AMDGPUfp_to_f16 f32:$src)),			(i16 (AMDGPUfp_to_f16 f32:$src)),
	(V_CVT_F16_F32_t16_e32 $src)			(V_CVT_F16_F32_t16_e32 $src)
	>;			>;
				def : GCNPat<
				(f16 (fpround f64:$src)),
				(V_CVT_F16_F32_t16_e64 SRCMODS.NONE, (V_CVT_F32_F64_e64 SRCMODS.NONE, $src))
				>;
	}			}

	def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> {			def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> {
	let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1);			let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1);
	let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1);			let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1);
	let Asm32 = " $vdst, $src0";			let Asm32 = " $vdst, $src0";
	}			}

	▲ Show 20 Lines • Show All 769 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

	Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines
	; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]			; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
	; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0			; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
	; VI-SDAG-NEXT: s_endpgm			; VI-SDAG-NEXT: s_endpgm
	;			;
	; VI-GISEL-LABEL: fptrunc_f64_to_f16:			; VI-GISEL-LABEL: fptrunc_f64_to_f16:
	; VI-GISEL: ; %bb.0: ; %entry			; VI-GISEL: ; %bb.0: ; %entry
	; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-GISEL-NEXT: s_movk_i32 s4, 0x7e00
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0			; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_bfe_u32 s5, s3, 0xb0014			; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; VI-GISEL-NEXT: s_lshr_b32 s6, s3, 8
	; VI-GISEL-NEXT: s_and_b32 s7, s3, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s5, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s2, s7, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s6, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_cselect_b32 s4, s4, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s7, 1, s5
	; VI-GISEL-NEXT: s_lshl_b32 s6, s5, 12
	; VI-GISEL-NEXT: s_max_i32 s7, s7, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s2, s6
	; VI-GISEL-NEXT: s_min_i32 s7, s7, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s2, 12
	; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7
	; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7
	; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s8, s2
	; VI-GISEL-NEXT: s_cmp_lt_i32 s5, 1
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s2, 7
	; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s7, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 1
	; VI-GISEL-NEXT: s_add_i32 s2, s2, s6
	; VI-GISEL-NEXT: s_cmp_gt_i32 s5, 30
	; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s2, s4, s2
	; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
	; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; VI-GISEL-NEXT: s_mov_b32 s2, -1			; VI-GISEL-NEXT: s_mov_b32 s2, -1
	; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000			; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
				; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-GISEL-NEXT: s_endpgm			; VI-GISEL-NEXT: s_endpgm
	;			;
	; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX9-SDAG: ; %bb.0: ; %entry			; GFX9-SDAG: ; %bb.0: ; %entry
	; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000			; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
	; GFX9-SDAG-NEXT: s_mov_b32 s6, -1			; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
	Show All 9 Lines
	; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]			; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
	; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0			; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
	; GFX9-SDAG-NEXT: s_endpgm			; GFX9-SDAG-NEXT: s_endpgm
	;			;
	; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX9-GISEL: ; %bb.0: ; %entry			; GFX9-GISEL: ; %bb.0: ; %entry
	; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7e00
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0			; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_bfe_u32 s5, s3, 0xb0014			; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX9-GISEL-NEXT: s_lshr_b32 s6, s3, 8
	; GFX9-GISEL-NEXT: s_and_b32 s7, s3, 0x1ff
	; GFX9-GISEL-NEXT: s_addk_i32 s5, 0xfc10
	; GFX9-GISEL-NEXT: s_and_b32 s6, s6, 0xffe
	; GFX9-GISEL-NEXT: s_or_b32 s2, s7, s2
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, 0x7c00
	; GFX9-GISEL-NEXT: s_sub_i32 s7, 1, s5
	; GFX9-GISEL-NEXT: s_lshl_b32 s6, s5, 12
	; GFX9-GISEL-NEXT: s_max_i32 s7, s7, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s2, s6
	; GFX9-GISEL-NEXT: s_min_i32 s7, s7, 13
	; GFX9-GISEL-NEXT: s_bitset1_b32 s2, 12
	; GFX9-GISEL-NEXT: s_lshr_b32 s8, s2, s7
	; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, s7
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s7, s2
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s2, s8, s2
	; GFX9-GISEL-NEXT: s_cmp_lt_i32 s5, 1
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX9-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX9-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX9-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s5, 30
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s4, s2
	; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX9-GISEL-NEXT: s_mov_b32 s2, -1			; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
	; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000			; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
				; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX9-GISEL-NEXT: s_endpgm			; GFX9-GISEL-NEXT: s_endpgm
	;			;
	; GFX11-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX11-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX11-SDAG: ; %bb.0: ; %entry			; GFX11-SDAG: ; %bb.0: ; %entry
	; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-SDAG-NEXT: s_mov_b32 s6, -1			; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
	; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000			; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
	Show All 14 Lines
	; GFX11-SDAG-NEXT: s_endpgm			; GFX11-SDAG-NEXT: s_endpgm
	;			;
	; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX11-GISEL: ; %bb.0: ; %entry			; GFX11-GISEL: ; %bb.0: ; %entry
	; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0			; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX11-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; GFX11-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; GFX11-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX11-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s4
	; GFX11-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s9, s4, 12
	; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX11-GISEL-NEXT: s_or_b32 s2, s2, s9
	; GFX11-GISEL-NEXT: s_lshr_b32 s8, s7, s6
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s7
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX11-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s2, -1			; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
				; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
				; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
				; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-GISEL-NEXT: s_endpgm			; GFX11-GISEL-NEXT: s_endpgm
	ptr addrspace(1) %r,			ptr addrspace(1) %r,
	ptr addrspace(1) %a) {			ptr addrspace(1) %a) {
	entry:			entry:
	%a.val = load double, ptr addrspace(1) %a			%a.val = load double, ptr addrspace(1) %a
	%r.val = fptrunc double %a.val to half			%r.val = fptrunc double %a.val to half
	▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines
	; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0			; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
	; VI-SDAG-NEXT: s_endpgm			; VI-SDAG-NEXT: s_endpgm
	;			;
	; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:			; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
	; VI-GISEL: ; %bb.0: ; %entry			; VI-GISEL: ; %bb.0: ; %entry
	; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0			; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
	; VI-GISEL-NEXT: s_movk_i32 s2, 0x7e00			; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
				; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
				; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
				; VI-GISEL-NEXT: v_or_b32_e32 v2, v0, v1
	; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0			; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
	; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1			; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014
	; VI-GISEL-NEXT: s_lshr_b32 s8, s5, 8
	; VI-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s4, s9, s4
	; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s4, s8, s4
	; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; VI-GISEL-NEXT: s_cselect_b32 s8, s2, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s10, 1, s3
	; VI-GISEL-NEXT: s_lshl_b32 s9, s3, 12
	; VI-GISEL-NEXT: s_max_i32 s10, s10, 0
	; VI-GISEL-NEXT: s_or_b32 s9, s4, s9
	; VI-GISEL-NEXT: s_min_i32 s10, s10, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s4, 12
	; VI-GISEL-NEXT: s_lshr_b32 s11, s4, s10
	; VI-GISEL-NEXT: s_lshl_b32 s10, s11, s10
	; VI-GISEL-NEXT: s_cmp_lg_u32 s10, s4
	; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s4, s11, s4
	; VI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
	; VI-GISEL-NEXT: s_cselect_b32 s4, s4, s9
	; VI-GISEL-NEXT: s_and_b32 s9, s4, 7
	; VI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s9, 3
	; VI-GISEL-NEXT: s_cselect_b32 s10, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s9, 5
	; VI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s9, s10, s9
	; VI-GISEL-NEXT: s_and_b32 s9, s9, 1
	; VI-GISEL-NEXT: s_add_i32 s4, s4, s9
	; VI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
	; VI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s3, s8, s4
	; VI-GISEL-NEXT: s_lshr_b32 s4, s5, 16
	; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s3, s4, s3
	; VI-GISEL-NEXT: s_bfe_u32 s4, s7, 0xb0014
	; VI-GISEL-NEXT: s_lshr_b32 s5, s7, 8
	; VI-GISEL-NEXT: s_and_b32 s8, s7, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s6, s8, s6
	; VI-GISEL-NEXT: s_cmp_lg_u32 s6, 0
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s5, s5, s6
	; VI-GISEL-NEXT: s_cmp_lg_u32 s5, 0
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s8, 1, s4
	; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12
	; VI-GISEL-NEXT: s_max_i32 s8, s8, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s5, s6
	; VI-GISEL-NEXT: s_min_i32 s8, s8, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s5, 12
	; VI-GISEL-NEXT: s_lshr_b32 s9, s5, s8
	; VI-GISEL-NEXT: s_lshl_b32 s8, s9, s8
	; VI-GISEL-NEXT: s_cmp_lg_u32 s8, s5
	; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s5, s9, s5
	; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; VI-GISEL-NEXT: s_cselect_b32 s5, s5, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s5, 7
	; VI-GISEL-NEXT: s_lshr_b32 s5, s5, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s8, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 1
	; VI-GISEL-NEXT: s_add_i32 s5, s5, s6
	; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; VI-GISEL-NEXT: s_cselect_b32 s5, 0x7c00, s5
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s5
	; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 16
	; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s2, s4, s2
	; VI-GISEL-NEXT: s_and_b32 s2, s2, 0xffff
	; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff
	; VI-GISEL-NEXT: s_lshl_b32 s2, s2, 16
	; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
	; VI-GISEL-NEXT: v_mov_b32_e32 v2, s2
	; VI-GISEL-NEXT: flat_store_dword v[0:1], v2			; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
	; VI-GISEL-NEXT: s_endpgm			; VI-GISEL-NEXT: s_endpgm
	;			;
	; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:			; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX9-SDAG: ; %bb.0: ; %entry			; GFX9-SDAG: ; %bb.0: ; %entry
	; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000			; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
	; GFX9-SDAG-NEXT: s_mov_b32 s6, -1			; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
	Show All 12 Lines
	; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0			; GFX9-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
	; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0			; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
	; GFX9-SDAG-NEXT: s_endpgm			; GFX9-SDAG-NEXT: s_endpgm
	;			;
	; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:			; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX9-GISEL: ; %bb.0: ; %entry			; GFX9-GISEL: ; %bb.0: ; %entry
	; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0			; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
	; GFX9-GISEL-NEXT: s_movk_i32 s2, 0x7e00
	; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014			; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
	; GFX9-GISEL-NEXT: s_lshr_b32 s8, s5, 8			; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
	; GFX9-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff			; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX9-GISEL-NEXT: s_addk_i32 s3, 0xfc10			; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
	; GFX9-GISEL-NEXT: s_and_b32 s8, s8, 0xffe			; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
	; GFX9-GISEL-NEXT: s_or_b32 s4, s9, s4			; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s4, s8, s4
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s8, s2, 0x7c00
	; GFX9-GISEL-NEXT: s_sub_i32 s10, 1, s3
	; GFX9-GISEL-NEXT: s_lshl_b32 s9, s3, 12
	; GFX9-GISEL-NEXT: s_max_i32 s10, s10, 0
	; GFX9-GISEL-NEXT: s_or_b32 s9, s4, s9
	; GFX9-GISEL-NEXT: s_min_i32 s10, s10, 13
	; GFX9-GISEL-NEXT: s_bitset1_b32 s4, 12
	; GFX9-GISEL-NEXT: s_lshr_b32 s11, s4, s10
	; GFX9-GISEL-NEXT: s_lshl_b32 s10, s11, s10
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s10, s4
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s4, s11, s4
	; GFX9-GISEL-NEXT: s_cmp_lt_i32 s3, 1
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s9
	; GFX9-GISEL-NEXT: s_and_b32 s9, s4, 7
	; GFX9-GISEL-NEXT: s_lshr_b32 s4, s4, 2
	; GFX9-GISEL-NEXT: s_cmp_eq_u32 s9, 3
	; GFX9-GISEL-NEXT: s_cselect_b32 s10, 1, 0
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s9, 5
	; GFX9-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s9, s10, s9
	; GFX9-GISEL-NEXT: s_and_b32 s9, s9, 1
	; GFX9-GISEL-NEXT: s_add_i32 s4, s4, s9
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s3, 30
	; GFX9-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
	; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
	; GFX9-GISEL-NEXT: s_cselect_b32 s3, s8, s4
	; GFX9-GISEL-NEXT: s_lshr_b32 s4, s5, 16
	; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; GFX9-GISEL-NEXT: s_or_b32 s3, s4, s3
	; GFX9-GISEL-NEXT: s_bfe_u32 s4, s7, 0xb0014
	; GFX9-GISEL-NEXT: s_lshr_b32 s5, s7, 8
	; GFX9-GISEL-NEXT: s_and_b32 s8, s7, 0x1ff
	; GFX9-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s6, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s5, s5, s6
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s5, 0
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, 0x7c00
	; GFX9-GISEL-NEXT: s_sub_i32 s8, 1, s4
	; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 12
	; GFX9-GISEL-NEXT: s_max_i32 s8, s8, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s5, s6
	; GFX9-GISEL-NEXT: s_min_i32 s8, s8, 13
	; GFX9-GISEL-NEXT: s_bitset1_b32 s5, 12
	; GFX9-GISEL-NEXT: s_lshr_b32 s9, s5, s8
	; GFX9-GISEL-NEXT: s_lshl_b32 s8, s9, s8
	; GFX9-GISEL-NEXT: s_cmp_lg_u32 s8, s5
	; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s5, s9, s5
	; GFX9-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX9-GISEL-NEXT: s_cselect_b32 s5, s5, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s5, 7
	; GFX9-GISEL-NEXT: s_lshr_b32 s5, s5, 2
	; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX9-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX9-GISEL-NEXT: s_add_i32 s5, s5, s6
	; GFX9-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX9-GISEL-NEXT: s_cselect_b32 s5, 0x7c00, s5
	; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s5
	; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 16
	; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; GFX9-GISEL-NEXT: s_or_b32 s2, s4, s2
	; GFX9-GISEL-NEXT: s_pack_ll_b32_b16 s2, s3, s2
	; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]			; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
	; GFX9-GISEL-NEXT: s_endpgm			; GFX9-GISEL-NEXT: s_endpgm
	;			;
	; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:			; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX11-SDAG: ; %bb.0: ; %entry			; GFX11-SDAG: ; %bb.0: ; %entry
	; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-SDAG-NEXT: s_mov_b32 s6, -1			; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
	; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000			; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
	Show All 16 Lines
	; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0			; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
	; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0			; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
	; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-SDAG-NEXT: s_endpgm			; GFX11-SDAG-NEXT: s_endpgm
	;			;
	; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16:			; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
	; GFX11-GISEL: ; %bb.0: ; %entry			; GFX11-GISEL: ; %bb.0: ; %entry
	; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0			; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff			; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
	; GFX11-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014			; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s5, 8			; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_2)
	; GFX11-GISEL-NEXT: s_or_b32 s4, s8, s4			; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX11-GISEL-NEXT: s_addk_i32 s2, 0xfc10			; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0xffe			; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s4, 0			; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
	; GFX11-GISEL-NEXT: s_cselect_b32 s4, 1, 0			; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s3, s3, s4
	; GFX11-GISEL-NEXT: s_movk_i32 s4, 0x7e00
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s3, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s8, s4, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s9, 1, s2
	; GFX11-GISEL-NEXT: s_or_b32 s10, s3, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s9, s9, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s12, s2, 12
	; GFX11-GISEL-NEXT: s_min_i32 s9, s9, 13
	; GFX11-GISEL-NEXT: s_or_b32 s3, s3, s12
	; GFX11-GISEL-NEXT: s_lshr_b32 s11, s10, s9
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s9, s11, s9
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s9, s10
	; GFX11-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s9, s11, s9
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s2, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, s9, s3
	; GFX11-GISEL-NEXT: s_and_b32 s9, s3, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s9, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s10, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s9, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s9, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s9, s10, s9
	; GFX11-GISEL-NEXT: s_and_b32 s9, s9, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s3, s3, s9
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s2, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s8, s3
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s5, 16
	; GFX11-GISEL-NEXT: s_and_b32 s9, s7, 0x1ff
	; GFX11-GISEL-NEXT: s_bfe_u32 s5, s7, 0xb0014
	; GFX11-GISEL-NEXT: s_lshr_b32 s8, s7, 8
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s6, s9, s6
	; GFX11-GISEL-NEXT: s_addk_i32 s5, 0xfc10
	; GFX11-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
	; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s3, s8, s3
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s3, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s4, s4, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s5
	; GFX11-GISEL-NEXT: s_or_b32 s8, s3, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s10, s5, 12
	; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX11-GISEL-NEXT: s_or_b32 s3, s3, s10
	; GFX11-GISEL-NEXT: s_lshr_b32 s9, s8, s6
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s6, s9, s6
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s8
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s9, s6
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s5, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, s6, s3
	; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s8, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s3, s3, s6
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s5, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s3, s4, s3
	; GFX11-GISEL-NEXT: s_lshr_b32 s4, s7, 16
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s3, s4, s3
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]			; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
	; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-GISEL-NEXT: s_endpgm			; GFX11-GISEL-NEXT: s_endpgm
	ptr addrspace(1) %r,			ptr addrspace(1) %r,
	ptr addrspace(1) %a) {			ptr addrspace(1) %a) {
	entry:			entry:
	%a.val = load <2 x double>, ptr addrspace(1) %a			%a.val = load <2 x double>, ptr addrspace(1) %a
	%r.val = fptrunc <2 x double> %a.val to <2 x half>			%r.val = fptrunc <2 x double> %a.val to <2 x half>
	▲ Show 20 Lines • Show All 750 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fptrunc.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefixes=SI %s			; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefixes=SI %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL %s			; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s
	; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL %s			; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL %s

	define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {			define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {
	; SI-LABEL: fptrunc_f64_to_f32:			; SI-LABEL: fptrunc_f64_to_f32:
	; SI: ; %bb.0:			; SI: ; %bb.0:
	; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9			; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
	; SI-NEXT: s_mov_b32 s7, 0xf000			; SI-NEXT: s_mov_b32 s7, 0xf000
	; SI-NEXT: s_mov_b32 s6, -1			; SI-NEXT: s_mov_b32 s6, -1
	; SI-NEXT: s_waitcnt lgkmcnt(0)			; SI-NEXT: s_waitcnt lgkmcnt(0)
	▲ Show 20 Lines • Show All 186 Lines • ▼ Show 20 Lines
	; VI-SAFE-SDAG-NEXT: s_cselect_b64 vcc, -1, 0			; VI-SAFE-SDAG-NEXT: s_cselect_b64 vcc, -1, 0
	; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 16			; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 16
	; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc			; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
	; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s4, 0x8000			; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s4, 0x8000
	; VI-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s4, v0			; VI-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s4, v0
	; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-SAFE-SDAG-NEXT: s_endpgm			; VI-SAFE-SDAG-NEXT: s_endpgm
	;			;
	; VI-GISEL-LABEL: fptrunc_f64_to_f16:			; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
	; VI-GISEL: ; %bb.0:			; VI-SAFE-GISEL: ; %bb.0:
	; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; VI-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014			; VI-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
	; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10			; VI-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe			; VI-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; VI-GISEL-NEXT: s_or_b32 s2, s6, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s5, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
	; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; VI-GISEL-NEXT: s_movk_i32 s5, 0x7e00			; VI-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; VI-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4			; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4
	; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12			; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12
	; VI-GISEL-NEXT: s_max_i32 s7, s7, 0			; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s2, s6			; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6
	; VI-GISEL-NEXT: s_min_i32 s7, s7, 13			; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13
	; VI-GISEL-NEXT: s_bitset1_b32 s2, 12			; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12
	; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7
	; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7			; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7
	; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2			; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2
	; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s2, s8, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s8, s2
	; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1			; VI-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s2, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s2, 7			; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
	; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3			; VI-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5			; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; VI-GISEL-NEXT: s_or_b32 s6, s7, s6			; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
	; VI-GISEL-NEXT: s_and_b32 s6, s6, 1			; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1
	; VI-GISEL-NEXT: s_add_i32 s2, s2, s6			; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
	; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30			; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f			; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2			; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16			; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000			; VI-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; VI-GISEL-NEXT: s_or_b32 s2, s3, s2			; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
	; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2			; VI-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; VI-GISEL-NEXT: s_mov_b32 s2, -1			; VI-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
	; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000			; VI-SAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000
	; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-GISEL-NEXT: s_endpgm			; VI-SAFE-GISEL-NEXT: s_endpgm
	;			;
	; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; VI-UNSAFE-SDAG: ; %bb.0:			; VI-UNSAFE-SDAG: ; %bb.0:
	; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]			; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000			; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000
	; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1			; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; VI-UNSAFE-SDAG-NEXT: s_endpgm			; VI-UNSAFE-SDAG-NEXT: s_endpgm
	;			;
				; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
				; VI-UNSAFE-GISEL: ; %bb.0:
				; VI-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
				; VI-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; VI-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
				; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
				; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000
				; VI-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; VI-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
				; VI-UNSAFE-GISEL-NEXT: s_endpgm
				;
	; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX10-SAFE-SDAG: ; %bb.0:			; GFX10-SAFE-SDAG: ; %bb.0:
	; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff			; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff
	; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8			; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8
	; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2			; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2
	; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe			; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe
	Show All 40 Lines
	; GFX10-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v0, 9, v0			; GFX10-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v0, 9, v0
	; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, 0x7c00, v0			; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, 0x7c00, v0
	; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo			; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo
	; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0			; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0
	; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX10-SAFE-SDAG-NEXT: s_endpgm			; GFX10-SAFE-SDAG-NEXT: s_endpgm
	;			;
	; GFX10-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX10-GISEL: ; %bb.0:			; GFX10-SAFE-GISEL: ; %bb.0:
	; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX10-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
	; GFX10-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014			; GFX10-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; GFX10-GISEL-NEXT: s_lshr_b32 s5, s3, 8			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; GFX10-GISEL-NEXT: s_or_b32 s2, s6, s2			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX10-GISEL-NEXT: s_addk_i32 s4, 0xfc10			; GFX10-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX10-GISEL-NEXT: s_and_b32 s5, s5, 0xffe			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX10-GISEL-NEXT: s_or_b32 s2, s5, s2			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
	; GFX10-GISEL-NEXT: s_movk_i32 s5, 0x7e00			; GFX10-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX10-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; GFX10-GISEL-NEXT: s_sub_i32 s6, 1, s4			; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
	; GFX10-GISEL-NEXT: s_or_b32 s7, s2, 0x1000			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
	; GFX10-GISEL-NEXT: s_max_i32 s6, s6, 0			; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX10-GISEL-NEXT: s_lshl_b32 s9, s4, 12			; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12
	; GFX10-GISEL-NEXT: s_min_i32 s6, s6, 13			; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX10-GISEL-NEXT: s_or_b32 s2, s2, s9			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9
	; GFX10-GISEL-NEXT: s_lshr_b32 s8, s7, s6			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6
	; GFX10-GISEL-NEXT: s_lshl_b32 s6, s8, s6			; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6
	; GFX10-GISEL-NEXT: s_cmp_lg_u32 s6, s7			; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7
	; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX10-GISEL-NEXT: s_or_b32 s6, s8, s6			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX10-GISEL-NEXT: s_cmp_lt_i32 s4, 1			; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, s6, s2			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
	; GFX10-GISEL-NEXT: s_and_b32 s6, s2, 7			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX10-GISEL-NEXT: s_lshr_b32 s2, s2, 2			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX10-GISEL-NEXT: s_cmp_eq_u32 s6, 3			; GFX10-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX10-GISEL-NEXT: s_cselect_b32 s7, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX10-GISEL-NEXT: s_cmp_gt_i32 s6, 5			; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX10-GISEL-NEXT: s_or_b32 s6, s7, s6			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX10-GISEL-NEXT: s_and_b32 s6, s6, 1			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX10-GISEL-NEXT: s_add_i32 s2, s2, s6			; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX10-GISEL-NEXT: s_cmp_gt_i32 s4, 30			; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX10-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f			; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX10-GISEL-NEXT: s_cselect_b32 s2, s5, s2			; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; GFX10-GISEL-NEXT: s_lshr_b32 s3, s3, 16			; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX10-GISEL-NEXT: s_and_b32 s3, s3, 0x8000			; GFX10-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX10-GISEL-NEXT: s_or_b32 s2, s3, s2			; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000			; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
	; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2			; GFX10-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX10-GISEL-NEXT: s_mov_b32 s2, -1			; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
	; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX10-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX10-GISEL-NEXT: s_endpgm			; GFX10-SAFE-GISEL-NEXT: s_endpgm
	;			;
	; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX10-UNSAFE-SDAG: ; %bb.0:			; GFX10-UNSAFE-SDAG: ; %bb.0:
	; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24			; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
	; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]			; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000			; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
	; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0			; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
	; GFX10-UNSAFE-SDAG-NEXT: s_endpgm			; GFX10-UNSAFE-SDAG-NEXT: s_endpgm
	;			;
				; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
				; GFX10-UNSAFE-GISEL: ; %bb.0:
				; GFX10-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
				; GFX10-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
				; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
				; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
				; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; GFX10-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
				; GFX10-UNSAFE-GISEL-NEXT: s_endpgm
				;
	; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX11-SAFE-SDAG: ; %bb.0:			; GFX11-SAFE-SDAG: ; %bb.0:
	; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff			; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff
	; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8			; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8
	; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2			; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2
	; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe			; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe
	▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo			; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo
	; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)			; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
	; GFX11-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0			; GFX11-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0
	; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-SAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-SAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-SAFE-SDAG-NEXT: s_endpgm			; GFX11-SAFE-SDAG-NEXT: s_endpgm
	;			;
	; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:			; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
	; GFX11-GISEL: ; %bb.0:			; GFX11-SAFE-GISEL: ; %bb.0:
	; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-SAFE-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
	; GFX11-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014			; GFX11-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
	; GFX11-GISEL-NEXT: s_lshr_b32 s5, s3, 8			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
	; GFX11-GISEL-NEXT: s_or_b32 s2, s6, s2			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_addk_i32 s4, 0xfc10			; GFX11-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
	; GFX11-GISEL-NEXT: s_and_b32 s5, s5, 0xffe			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s2, s5, s2			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_movk_i32 s5, 0x7e00			; GFX11-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0			; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
	; GFX11-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
	; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s4			; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
	; GFX11-GISEL-NEXT: s_or_b32 s7, s2, 0x1000			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
	; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0			; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
	; GFX11-GISEL-NEXT: s_lshl_b32 s9, s4, 12			; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12
	; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13			; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
	; GFX11-GISEL-NEXT: s_or_b32 s2, s2, s9			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9
	; GFX11-GISEL-NEXT: s_lshr_b32 s8, s7, s6			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_lshl_b32 s6, s8, s6			; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s7			; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(SKIP_2) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s8, s6			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6
	; GFX11-GISEL-NEXT: s_cmp_lt_i32 s4, 1			; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s6, s2			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
	; GFX11-GISEL-NEXT: s_and_b32 s6, s2, 7			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
	; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 2			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
	; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3			; GFX11-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
	; GFX11-GISEL-NEXT: s_cselect_b32 s7, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5			; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
	; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_or_b32 s6, s7, s6			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
	; GFX11-GISEL-NEXT: s_and_b32 s6, s6, 1			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_add_i32 s2, s2, s6			; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
	; GFX11-GISEL-NEXT: s_cmp_gt_i32 s4, 30			; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
	; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f			; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
	; GFX11-GISEL-NEXT: s_cselect_b32 s2, s5, s2			; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
	; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 16			; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
	; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)			; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) \| instskip(NEXT) \| instid1(SALU_CYCLE_1)
	; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000			; GFX11-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
	; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2			; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000			; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
	; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2			; GFX11-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
	; GFX11-GISEL-NEXT: s_mov_b32 s2, -1			; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
	; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-SAFE-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-GISEL-NEXT: s_endpgm			; GFX11-SAFE-GISEL-NEXT: s_endpgm
	;			;
	; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:			; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
	; GFX11-UNSAFE-SDAG: ; %bb.0:			; GFX11-UNSAFE-SDAG: ; %bb.0:
	; GFX11-UNSAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24			; GFX11-UNSAFE-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
	; GFX11-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)			; GFX11-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
	; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]			; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
	; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000			; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
	; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1			; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
	; GFX11-UNSAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)			; GFX11-UNSAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
	; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0			; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
	; GFX11-UNSAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0			; GFX11-UNSAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
	; GFX11-UNSAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)			; GFX11-UNSAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
	; GFX11-UNSAFE-SDAG-NEXT: s_endpgm			; GFX11-UNSAFE-SDAG-NEXT: s_endpgm
				;
				; GFX11-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
				; GFX11-UNSAFE-GISEL: ; %bb.0:
				; GFX11-UNSAFE-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
				; GFX11-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
				; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
				; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
				; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
				; GFX11-UNSAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
				; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
				; GFX11-UNSAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
				; GFX11-UNSAFE-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
				; GFX11-UNSAFE-GISEL-NEXT: s_endpgm
	%result = fptrunc double %in to half			%result = fptrunc double %in to half
	%result_i16 = bitcast half %result to i16			%result_i16 = bitcast half %result to i16
	store i16 %result_i16, ptr addrspace(1) %out			store i16 %result_i16, ptr addrspace(1) %out
	ret void			ret void
	}			}

	define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) {			define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) {
	; SI-LABEL: fptrunc_v2f64_to_v2f32:			; SI-LABEL: fptrunc_v2f64_to_v2f32:
	▲ Show 20 Lines • Show All 454 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[Codegen] Generate fast fp64-to-fp16 conversions in unsafe mode.
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 537417

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/VOP1Instructions.td

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

llvm/test/CodeGen/AMDGPU/fptrunc.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Codegen] Generate fast fp64-to-fp16 conversions in unsafe mode.ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 537417

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/VOP1Instructions.td

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

llvm/test/CodeGen/AMDGPU/fptrunc.ll

[Codegen] Generate fast fp64-to-fp16 conversions in unsafe mode.
ClosedPublic