This is an archive of the discontinued LLVM Phabricator instance.

llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
7	The assembly mnemonics do not include references to true16, only the MachineInstr/MCInst names do. I checked that the True16 version of the instruction was used via print-after-all before asm emission in this case.

Leonc added inline comments.Oct 11 2022, 7:27 AM

llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
7	Excellent. Thanks 👍

arsenm added inline comments.Oct 11 2022, 7:46 AM

llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
16	Should also test llvm.amdgcn.fcmp. I'd assume that's missing coverage too if this was broken
16	Plus ballot

Joe_Nash added inline comments.Oct 12 2022, 7:15 AM

llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
16	Is ballot well defined for 16 bit destinations? I think ballot should work over the waveSize, so only 32 or 64 bit returns are valid. For an instrinsic like this declare i16 @llvm.amdgcn.ballot.i16(i1) I would expect a problem here AMDGPUInstructionSelector.cpp:1226 AMDGPUInstructionSelector::selectBallot if (Size != STI.getWavefrontSize()) return false;

Joe_Nash marked an inline comment as not done.Oct 12 2022, 7:56 AM

Joe_Nash added inline comments.

llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
16	On second thought, I think you probably meant a test of a 16 bit compare feeding into ballot. See test https://reviews.llvm.org/D135782

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

VOPCInstructions.td

48 lines

test/

CodeGen/

AMDGPU/

v_cmp_gfx11.ll

26 lines

Diff 466603

llvm/lib/Target/AMDGPU/VOPCInstructions.td

	Show First 20 Lines • Show All 1,018 Lines • ▼ Show 20 Lines
	defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;			defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
	defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;			defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
	defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;			defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
	defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;			defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
	defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;			defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
	defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;			defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
	defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;			defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;

				let OtherPredicates = [HasTrue16BitInsts] in {
				defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
				defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
				} // End OtherPredicates = [HasTrue16BitInsts]

				let OtherPredicates = [NotHasTrue16BitInsts] in {
	defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;			defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
	defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;			defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
	defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;			defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
	defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;			defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
	defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;			defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
	defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;			defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
	defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;			defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
	defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;			defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
	defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;			defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
	defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;			defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
				} // End OtherPredicates = [NotHasTrue16BitInsts]

	multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {			multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
	let WaveSizePredicate = isWave64 in			let WaveSizePredicate = isWave64 in
	def : GCNPat <			def : GCNPat <
	(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),			(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
	(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),			(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
	(i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,			(i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
	DSTCLAMP.NONE), SReg_64))			DSTCLAMP.NONE), SReg_64))
	Show All 17 Lines

	defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;			defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
	defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;			defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
	defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;			defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
	defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;			defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
	defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;			defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
	defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;			defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;

	defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
	defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
	defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
	defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
	defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
	defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;


	defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;			defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
	defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;			defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
	defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;			defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
	defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;			defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
	defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;			defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
	defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;			defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;

	defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;			defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
	defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;			defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
	defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;			defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
	defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;			defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
	defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;			defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
	defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;			defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;

				let OtherPredicates = [HasTrue16BitInsts] in {
				defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;

				defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
				defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
				} // End OtherPredicates = [HasTrue16BitInsts]

				let OtherPredicates = [NotHasTrue16BitInsts] in {
				defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
				defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
				defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
				defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
				defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
				defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;

	defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;			defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
	defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;			defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
	defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;			defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
	defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;			defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
	defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;			defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
	defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;			defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
				} // End OtherPredicates = [NotHasTrue16BitInsts]

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// DPP Encodings			// DPP Encodings
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	// VOPC32			// VOPC32

	class VOPC_DPPe_Common<bits<8> op> : Enc64 {			class VOPC_DPPe_Common<bits<8> op> : Enc64 {
	▲ Show 20 Lines • Show All 1,273 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s \| FileCheck -check-prefixes=CHECK %s

				define amdgpu_kernel void @test() {
				arsenmUnsubmitted Not Done Reply Inline Actions Can replace -march with -mtriple arsenm: Can replace -march with -mtriple
				; CHECK-LABEL: test:
				; CHECK: ; %bb.0: ; %entry
				; CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, 0
				LeoncUnsubmitted Not Done Reply Inline Actions Shouldn't this be `v_cmp_eq_u16_t16_e64` on gfx11? Leonc: Shouldn't this be `v_cmp_eq_u16_t16_e64` on gfx11?
				Joe_NashAuthorUnsubmitted Done Reply Inline Actions The assembly mnemonics do not include references to true16, only the MachineInstr/MCInst names do. I checked that the True16 version of the instruction was used via print-after-all before asm emission in this case. Joe_Nash: The assembly mnemonics do not include references to true16, only the MachineInstr/MCInst names…
				LeoncUnsubmitted Done Reply Inline Actions Excellent. Thanks 👍 Leonc: Excellent. Thanks 👍
				; CHECK-NEXT: v_mov_b32_e32 v1, 0
				; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_1) \| instid1(SALU_CYCLE_1)
				; CHECK-NEXT: s_cmp_eq_u32 s0, 0
				; CHECK-NEXT: s_cselect_b32 s0, -1, 0
				; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
				; CHECK-NEXT: ds_store_b32 v1, v0
				; CHECK-NEXT: s_endpgm
				entry:
				%0 = tail call i64 @llvm.amdgcn.icmp.i64.i16(i16 0, i16 0, i32 32)
				arsenmUnsubmitted Not Done Reply Inline Actions Should also test llvm.amdgcn.fcmp. I'd assume that's missing coverage too if this was broken arsenm: Should also test llvm.amdgcn.fcmp. I'd assume that's missing coverage too if this was broken
				arsenmUnsubmitted Not Done Reply Inline Actions Plus ballot arsenm: Plus ballot
				Joe_NashAuthorUnsubmitted Done Reply Inline Actions Is ballot well defined for 16 bit destinations? I think ballot should work over the waveSize, so only 32 or 64 bit returns are valid. For an instrinsic like this declare i16 @llvm.amdgcn.ballot.i16(i1) I would expect a problem here AMDGPUInstructionSelector.cpp:1226 AMDGPUInstructionSelector::selectBallot if (Size != STI.getWavefrontSize()) return false; Joe_Nash: Is ballot well defined for 16 bit destinations? I think ballot should work over the waveSize…
				Joe_NashAuthorUnsubmitted Done Reply Inline Actions On second thought, I think you probably meant a test of a 16 bit compare feeding into ballot. See test https://reviews.llvm.org/D135782 Joe_Nash: On second thought, I think you probably meant a test of a 16 bit compare feeding into ballot.
				%cmp0 = icmp eq i64 %0, 0
				%add0 = zext i1 %cmp0 to i32
				store i32 %add0, ptr addrspace(3) null, align 2147483648
				ret void
				}

				; Function Attrs: convergent nounwind readnone willreturn
				declare i64 @llvm.amdgcn.icmp.i64.i16(i16, i16, i32 immarg) #0

				attributes #0 = { convergent nounwind readnone willreturn }

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix True16 patterns for cmp on GFX11ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 466603

llvm/lib/Target/AMDGPU/VOPCInstructions.td

llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll

[AMDGPU] Fix True16 patterns for cmp on GFX11
ClosedPublic