Diff 64421

include/llvm/IR/IntrinsicsAMDGPU.td

Context not available.
	// llvm.amdgcn.ds.swizzle src offset	// llvm.amdgcn.ds.swizzle src offset
	def int_amdgcn_ds_swizzle :	def int_amdgcn_ds_swizzle :
	GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,	GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
	Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;	Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;

	// llvm.amdgcn.lerp	// llvm.amdgcn.lerp
	def int_amdgcn_lerp :	def int_amdgcn_lerp :
	GCCBuiltin<"__builtin_amdgcn_lerp">,	GCCBuiltin<"__builtin_amdgcn_lerp">,
	Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;	Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

		// llvm.amdgcn.readfirlane src
		arsenmUnsubmitted Not Done Reply Inline Actions Comment typo and not needed arsenm: Comment typo and not needed
		def int_amdgcn_readfirstlane :
		GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
		Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;

		// llvm.amdgcn.readlane
		arsenmUnsubmitted Not Done Reply Inline Actions Ditto arsenm: Ditto
		def int_amdgcn_readlane :
		GCCBuiltin<"__builtin_amdgcn_readlane">,
		Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
	// CI+ Intrinsics	// CI+ Intrinsics
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//

	def int_amdgcn_s_dcache_inv_vol :	def int_amdgcn_s_dcache_inv_vol :
	GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,	GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
	Intrinsic<[], [], []>;	Intrinsic<[], [], []>;

	def int_amdgcn_buffer_wbinvl1_vol :	def int_amdgcn_buffer_wbinvl1_vol :
	GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,	GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
Context not available.

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Context not available.

	MCOperand errOperand(unsigned V, const llvm::Twine& ErrMsg) const;	MCOperand errOperand(unsigned V, const llvm::Twine& ErrMsg) const;

	DecodeStatus tryDecodeInst(const uint8_t* Table,	DecodeStatus tryDecodeInst(const uint8_t* Table,
	MCInst &MI,	MCInst &MI,
	uint64_t Inst,	uint64_t Inst,
	uint64_t Address) const;	uint64_t Address) const;

	MCOperand decodeOperand_VGPR_32(unsigned Val) const;	MCOperand decodeOperand_VGPR_32(unsigned Val) const;
	MCOperand decodeOperand_VS_32(unsigned Val) const;	MCOperand decodeOperand_VS_32(unsigned Val) const;
		MCOperand decodeOperand_VM0_32(unsigned Val) const;
	MCOperand decodeOperand_VS_64(unsigned Val) const;	MCOperand decodeOperand_VS_64(unsigned Val) const;

	MCOperand decodeOperand_VReg_64(unsigned Val) const;	MCOperand decodeOperand_VReg_64(unsigned Val) const;
	MCOperand decodeOperand_VReg_96(unsigned Val) const;	MCOperand decodeOperand_VReg_96(unsigned Val) const;
	MCOperand decodeOperand_VReg_128(unsigned Val) const;	MCOperand decodeOperand_VReg_128(unsigned Val) const;

	MCOperand decodeOperand_SReg_32(unsigned Val) const;	MCOperand decodeOperand_SReg_32(unsigned Val) const;
	MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;	MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
	MCOperand decodeOperand_SReg_64(unsigned Val) const;	MCOperand decodeOperand_SReg_64(unsigned Val) const;
	MCOperand decodeOperand_SReg_128(unsigned Val) const;	MCOperand decodeOperand_SReg_128(unsigned Val) const;
Context not available.

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Context not available.
	uint64_t /Addr/, \	uint64_t /Addr/, \
	const void *Decoder) { \	const void *Decoder) { \
	auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \	auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
	return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \	return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \
	}	}

	#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass)	#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass)

	DECODE_OPERAND(VGPR_32)	DECODE_OPERAND(VGPR_32)
	DECODE_OPERAND(VS_32)	DECODE_OPERAND(VS_32)
		DECODE_OPERAND(VM0_32)
	DECODE_OPERAND(VS_64)	DECODE_OPERAND(VS_64)

	DECODE_OPERAND(VReg_64)	DECODE_OPERAND(VReg_64)
	DECODE_OPERAND(VReg_96)	DECODE_OPERAND(VReg_96)
	DECODE_OPERAND(VReg_128)	DECODE_OPERAND(VReg_128)

	DECODE_OPERAND(SReg_32)	DECODE_OPERAND(SReg_32)
	DECODE_OPERAND(SReg_32_XM0)	DECODE_OPERAND(SReg_32_XM0)
	DECODE_OPERAND(SReg_64)	DECODE_OPERAND(SReg_64)
	DECODE_OPERAND(SReg_128)	DECODE_OPERAND(SReg_128)
Context not available.
	if (Val % (1 << shift))	if (Val % (1 << shift))
	*CommentStream << "Warning: " << getRegClassName(SRegClassID)	*CommentStream << "Warning: " << getRegClassName(SRegClassID)
	<< ": scalar reg isn't aligned " << Val;	<< ": scalar reg isn't aligned " << Val;
	return createRegOperand(SRegClassID, Val >> shift);	return createRegOperand(SRegClassID, Val >> shift);
	}	}

	MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {	MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
	return decodeSrcOp(OPW32, Val);	return decodeSrcOp(OPW32, Val);
	}	}

		MCOperand AMDGPUDisassembler::decodeOperand_VM0_32(unsigned Val) const {
		return decodeSrcOp(OPW32, Val);
		}

	MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {	MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
	return decodeSrcOp(OPW64, Val);	return decodeSrcOp(OPW64, Val);
	}	}

	MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {	MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
	return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);	return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
	}	}

	MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {	MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
	return createRegOperand(AMDGPU::VReg_64RegClassID, Val);	return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
Context not available.

lib/Target/AMDGPU/SIInstructions.td

Context not available.
	defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;	defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
	} // End isMoveImm = 1	} // End isMoveImm = 1

	let Uses = [EXEC] in {	let Uses = [EXEC] in {

	// FIXME: Specify SchedRW for READFIRSTLANE_B32	// FIXME: Specify SchedRW for READFIRSTLANE_B32

	def V_READFIRSTLANE_B32 : VOP1 <	def V_READFIRSTLANE_B32 : VOP1 <
	0x00000002,	0x00000002,
	(outs SReg_32:$vdst),	(outs SReg_32:$vdst),
	(ins VS_32:$src0),	(ins VM0_32:$src0),
	"v_readfirstlane_b32 $vdst, $src0",	"v_readfirstlane_b32 $vdst, $src0",
	[]	[]
		tstellarAMDUnsubmitted Not Done Reply Inline Actions You should use the type in the pattern rather than the register class. tstellarAMD: You should use the type in the pattern rather than the register class.
	> {	> {
	let isConvergent = 1;	let isConvergent = 1;
	}	}

	}	}

	let SchedRW = [WriteQuarterRate32] in {	let SchedRW = [WriteQuarterRate32] in {

Context not available.

	} // End isCommutable = 1	} // End isCommutable = 1

	// These are special and do not read the exec mask.	// These are special and do not read the exec mask.
	let isConvergent = 1, Uses = []<Register> in {	let isConvergent = 1, Uses = []<Register> in {

	defm V_READLANE_B32 : VOP2SI_3VI_m <	defm V_READLANE_B32 : VOP2SI_3VI_m <
	vop3 <0x001, 0x289>,	vop3 <0x001, 0x289>,
	"v_readlane_b32",	"v_readlane_b32",
	(outs SReg_32:$vdst),	(outs SReg_32:$vdst),
	(ins VS_32:$src0, SCSrc_32:$src1),	(ins VM0_32:$src0, SCSrc_32:$src1),
	"v_readlane_b32 $vdst, $src0, $src1"	"v_readlane_b32 $vdst, $src0, $src1"
	>;	>;
		tstellarAMDUnsubmitted Not Done Reply Inline Actions Same here. tstellarAMD: Same here.

	defm V_WRITELANE_B32 : VOP2SI_3VI_m <	defm V_WRITELANE_B32 : VOP2SI_3VI_m <
	vop3 <0x002, 0x28a>,	vop3 <0x002, 0x28a>,
	"v_writelane_b32",	"v_writelane_b32",
	(outs VGPR_32:$vdst),	(outs VGPR_32:$vdst),
	(ins SReg_32:$src0, SCSrc_32:$src1),	(ins SReg_32:$src0, SCSrc_32:$src1),
	"v_writelane_b32 $vdst, $src0, $src1"	"v_writelane_b32 $vdst, $src0, $src1"
	>;	>;
Context not available.

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
	// DS_SWIZZLE Intrinsic Pattern.	// DS_SWIZZLE Intrinsic Pattern.
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
	def : Pat <	def : Pat <
	(int_amdgcn_ds_swizzle i32:$src, imm:$offset16),	(int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
	(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))	(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
	>;	>;

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
		// READFIRSTLANE Intrinsic Pattern.
		tstellarAMDUnsubmitted Not Done Reply Inline Actions Extra whitespace. tstellarAMD: Extra whitespace.
		//===----------------------------------------------------------------------===//
		def : Pat <
		(int_amdgcn_readfirstlane i32:$src),
		(V_READFIRSTLANE_B32 $src)
		arsenmUnsubmitted Not Done Reply Inline Actions This is a very simple pattern so should go with the instruction definition's pattern arsenm: This is a very simple pattern so should go with the instruction definition's pattern
		>;

		//===----------------------------------------------------------------------===//
		// READLANE Intrinsic Pattern.
		//===----------------------------------------------------------------------===//
		def : Pat <
		(int_amdgcn_readlane i32:$src0, i32:$src1),
		(V_READLANE_B32 $src0, $src1)
		arsenmUnsubmitted Not Done Reply Inline Actions Ditto arsenm: Ditto
		>;

		//===----------------------------------------------------------------------===//
	// SMRD Patterns	// SMRD Patterns
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//

	multiclass SMRD_Pattern <string Instr, ValueType vt> {	multiclass SMRD_Pattern <string Instr, ValueType vt> {

	// 1. IMM offset	// 1. IMM offset
	def : Pat <	def : Pat <
	(smrd_load (SMRDImm i64:$sbase, i32:$offset)),	(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
	(vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))	(vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))
	>;	>;
Context not available.

lib/Target/AMDGPU/SIRegisterInfo.td

Context not available.
	def SCSrc_32 : RegInlineOperand<SReg_32> {	def SCSrc_32 : RegInlineOperand<SReg_32> {
	let ParserMatchClass = RegImmMatcher<"SCSrc32">;	let ParserMatchClass = RegImmMatcher<"SCSrc32">;
	}	}

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
	// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate	// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//

	def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;	def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;

		def VM0_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, M0)> {
		let isAllocatable = 0;
		}

	def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {	def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
	let CopyCost = 2;	let CopyCost = 2;
	}	}

	def VSrc_32 : RegisterOperand<VS_32> {	def VSrc_32 : RegisterOperand<VS_32> {
	let OperandNamespace = "AMDGPU";	let OperandNamespace = "AMDGPU";
	let OperandType = "OPERAND_REG_IMM32";	let OperandType = "OPERAND_REG_IMM32";
	let ParserMatchClass = RegImmMatcher<"VSrc32">;	let ParserMatchClass = RegImmMatcher<"VSrc32">;
	}	}

Context not available.

test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll

This file was added.

				; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s \| FileCheck %s

				declare i32 @llvm.amdgcn.readfirstlane(i32) #0

				; CHECK-LABEL: {{^}}test_readfirstlane:
				; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, v{{[0-9]+}}
				define void @test_readfirstlane(i32 addrspace(1)* %out, i32 %src) nounwind {
				%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %src) #0
				store i32 %readfirstlane, i32 addrspace(1)* %out, align 4
				ret void
				}
				arsenmUnsubmitted Not Done Reply Inline Actions Should also include a test which has an immediate source to make sure that it is moved into a register. Another that uses inline asm to put a value in m0 would also be useful (same for the other intrinsic too) arsenm: Should also include a test which has an immediate source to make sure that it is moved into a…

				attributes #0 = { nounwind readnone convergent }

test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll

This file was added.

				; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s \| FileCheck %s

				declare i32 @llvm.amdgcn.readlane(i32, i32) #0

				; CHECK-LABEL: {{^}}readlane_sreg:
				; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
				define void @readlane_sreg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
				%readlane = call i32 @llvm.amdgcn.readlane(i32 %src0, i32 %src1) #0
				arsenmUnsubmitted Not Done Reply Inline Actions Attributes not needed on call site arsenm: Attributes not needed on call site
				store i32 %readlane, i32 addrspace(1)* %out, align 4
				ret void
				}

				; CHECK-LABEL: {{^}}readlane_imm:
				; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 32
				define void @readlane_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
				arsenmUnsubmitted Not Done Reply Inline Actions Use attribute group for the nounwind also arsenm: Use attribute group for the nounwind also
				%readlane = call i32 @llvm.amdgcn.readlane(i32 %src0, i32 32) #0
				store i32 %readlane, i32 addrspace(1)* %out, align 4
				ret void
				}

				attributes #0 = { nounwind readnone convergent }

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Implement readlane/readfirstlane intrinsics to expose the instructions.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 64421

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

lib/Target/AMDGPU/SIInstructions.td

lib/Target/AMDGPU/SIRegisterInfo.td

test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll

test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Implement readlane/readfirstlane intrinsics to expose the instructions.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 64421

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

lib/Target/AMDGPU/SIInstructions.td

lib/Target/AMDGPU/SIRegisterInfo.td

test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll

test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll

AMDGPU/SI: Implement readlane/readfirstlane intrinsics to expose the instructions.
ClosedPublic