Diff 115694

include/llvm/IR/IntrinsicsAMDGPU.td

Show First 20 Lines • Show All 288 Lines • ▼ Show 20 Lines	class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
llvm_i32_ty, // scope		llvm_i32_ty, // scope
llvm_i1_ty], // isVolatile		llvm_i1_ty], // isVolatile
[IntrArgMemOnly, NoCapture<0>]		[IntrArgMemOnly, NoCapture<0>]
>;		>;

def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;		def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;		def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;

		class AMDGPUAtomicF32Intrin : Intrinsic<[llvm_float_ty],
		[LLVMQualPointerType<llvm_float_ty, 3>, llvm_float_ty],
		[IntrArgMemOnly, NoCapture<0>]
		>;
		t-tyeUnsubmitted Done Reply Inline Actions Need to add the same fields as for AMDGPUAtomicIncIntrin, namely: llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile t-tye: Need to add the same fields as for AMDGPUAtomicIncIntrin, namely: ``` llvm_i32_ty, //…

		class AMDGPUAtomicF32IntrinNORET : Intrinsic<[],
		[LLVMQualPointerType<llvm_float_ty, 3>, llvm_float_ty],
		arsenmUnsubmitted Not Done Reply Inline Actions Should this have an operand added for the ordering? arsenm: Should this have an operand added for the ordering?
		dfukalovAuthorUnsubmitted Not Done Reply Inline Actions No, these intrinsics are created by request to be able to generate ds_{add\|min\|max}[_rtn]_f32 in case of OpenCL local memory atomics only. They work only for pointers to floats located in addrspace 3 dfukalov: No, these intrinsics are created by request to be able to generate ds_{add\|min\|max}[_rtn]_f32…
		arsenmUnsubmitted Not Done Reply Inline Actions That doesn't change the ordering. Also needs an operand for volatile arsenm: That doesn't change the ordering. Also needs an operand for volatile
		t-tyeUnsubmitted Not Done Reply Inline Actions How are the memory ordering, memory scope and volatile carried through so that those fields can be set in the Machine Memory Operand? All these properties are needed to generate the correct waitcnt in the memory legalizer (see AMDGPUUsage.rst section on memory model). t-tye: How are the memory ordering, memory scope and volatile carried through so that those fields can…
		arsenmUnsubmitted Done Reply Inline Actions Those are supposed to be handled by the intrinsic callbacks I mentioned that need to be implemented (at least for volatile). I'm not sure anything really correctly considers the atomic scope for possibly atomic intrinsics. The most similar case we have is for amdgcn_atomic_inc/dec. arsenm: Those are supposed to be handled by the intrinsic callbacks I mentioned that need to be…
		[IntrArgMemOnly, NoCapture<0>]
		arsenmUnsubmitted Done Reply Inline Actions This is certainly not IntrNoReturn arsenm: This is certainly not IntrNoReturn
		dfukalovAuthorUnsubmitted Not Done Reply Inline Actions You're right, I interpreted it as intrinsic that has no return value, diff updated dfukalov: You're right, I interpreted it as intrinsic that has no return value, diff updated
		>;
		t-tyeUnsubmitted Done Reply Inline Actions Need to add the same fields as for AMDGPUAtomicIncIntrin, namely: llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile t-tye: Need to add the same fields as for AMDGPUAtomicIncIntrin, namely: ``` llvm_i32_ty, //…

		arsenmUnsubmitted Done Reply Inline Actions These should probably be named fadd.. etc to match the IR operations arsenm: These should probably be named fadd.. etc to match the IR operations
		def int_amdgcn_ds_add_rtn_f32 : AMDGPUAtomicF32Intrin;
		def int_amdgcn_ds_min_rtn_f32 : AMDGPUAtomicF32Intrin;
		def int_amdgcn_ds_max_rtn_f32 : AMDGPUAtomicF32Intrin;
		def int_amdgcn_ds_add_f32 : AMDGPUAtomicF32IntrinNORET;
		def int_amdgcn_ds_min_f32 : AMDGPUAtomicF32IntrinNORET;
		def int_amdgcn_ds_max_f32 : AMDGPUAtomicF32IntrinNORET;

class AMDGPUImageLoad : Intrinsic <		class AMDGPUImageLoad : Intrinsic <
[llvm_anyfloat_ty], // vdata(VGPR)		[llvm_anyfloat_ty], // vdata(VGPR)
[llvm_anyint_ty, // vaddr(VGPR)		[llvm_anyint_ty, // vaddr(VGPR)
llvm_anyint_ty, // rsrc(SGPR)		llvm_anyint_ty, // rsrc(SGPR)
llvm_i32_ty, // dmask(imm)		llvm_i32_ty, // dmask(imm)
llvm_i1_ty, // glc(imm)		llvm_i1_ty, // glc(imm)
llvm_i1_ty, // slc(imm)		llvm_i1_ty, // slc(imm)
llvm_i1_ty, // lwe(imm)		llvm_i1_ty, // lwe(imm)
▲ Show 20 Lines • Show All 558 Lines • Show Last 20 Lines

lib/Target/AMDGPU/DSInstructions.td

	Show First 20 Lines • Show All 269 Lines • ▼ Show 20 Lines
	def DS_DEC_U32 : DS_1A1D_NORET<"ds_dec_u32">;			def DS_DEC_U32 : DS_1A1D_NORET<"ds_dec_u32">;
	def DS_MIN_I32 : DS_1A1D_NORET<"ds_min_i32">;			def DS_MIN_I32 : DS_1A1D_NORET<"ds_min_i32">;
	def DS_MAX_I32 : DS_1A1D_NORET<"ds_max_i32">;			def DS_MAX_I32 : DS_1A1D_NORET<"ds_max_i32">;
	def DS_MIN_U32 : DS_1A1D_NORET<"ds_min_u32">;			def DS_MIN_U32 : DS_1A1D_NORET<"ds_min_u32">;
	def DS_MAX_U32 : DS_1A1D_NORET<"ds_max_u32">;			def DS_MAX_U32 : DS_1A1D_NORET<"ds_max_u32">;
	def DS_AND_B32 : DS_1A1D_NORET<"ds_and_b32">;			def DS_AND_B32 : DS_1A1D_NORET<"ds_and_b32">;
	def DS_OR_B32 : DS_1A1D_NORET<"ds_or_b32">;			def DS_OR_B32 : DS_1A1D_NORET<"ds_or_b32">;
	def DS_XOR_B32 : DS_1A1D_NORET<"ds_xor_b32">;			def DS_XOR_B32 : DS_1A1D_NORET<"ds_xor_b32">;
	def DS_ADD_F32 : DS_1A1D_NORET<"ds_add_f32">;
	def DS_MIN_F32 : DS_1A1D_NORET<"ds_min_f32">;			def DS_MIN_F32 : DS_1A1D_NORET<"ds_min_f32">;
	def DS_MAX_F32 : DS_1A1D_NORET<"ds_max_f32">;			def DS_MAX_F32 : DS_1A1D_NORET<"ds_max_f32">;

	let mayLoad = 0 in {			let mayLoad = 0 in {
	def DS_WRITE_B8 : DS_1A1D_NORET<"ds_write_b8">;			def DS_WRITE_B8 : DS_1A1D_NORET<"ds_write_b8">;
	def DS_WRITE_B16 : DS_1A1D_NORET<"ds_write_b16">;			def DS_WRITE_B16 : DS_1A1D_NORET<"ds_write_b16">;
	def DS_WRITE_B32 : DS_1A1D_NORET<"ds_write_b32">;			def DS_WRITE_B32 : DS_1A1D_NORET<"ds_write_b32">;
	def DS_WRITE2_B32 : DS_1A2D_Off8_NORET<"ds_write2_b32">;			def DS_WRITE2_B32 : DS_1A2D_Off8_NORET<"ds_write2_b32">;
	Show All 34 Lines
	}			}
	def DS_CMPST_B64 : DS_1A2D_NORET<"ds_cmpst_b64", VReg_64>;			def DS_CMPST_B64 : DS_1A2D_NORET<"ds_cmpst_b64", VReg_64>;
	def DS_CMPST_F64 : DS_1A2D_NORET<"ds_cmpst_f64", VReg_64>;			def DS_CMPST_F64 : DS_1A2D_NORET<"ds_cmpst_f64", VReg_64>;
	def DS_MIN_F64 : DS_1A1D_NORET<"ds_min_f64", VReg_64>;			def DS_MIN_F64 : DS_1A1D_NORET<"ds_min_f64", VReg_64>;
	def DS_MAX_F64 : DS_1A1D_NORET<"ds_max_f64", VReg_64>;			def DS_MAX_F64 : DS_1A1D_NORET<"ds_max_f64", VReg_64>;

	def DS_ADD_RTN_U32 : DS_1A1D_RET<"ds_add_rtn_u32">,			def DS_ADD_RTN_U32 : DS_1A1D_RET<"ds_add_rtn_u32">,
	AtomicNoRet<"ds_add_u32", 1>;			AtomicNoRet<"ds_add_u32", 1>;
	def DS_ADD_RTN_F32 : DS_1A1D_RET<"ds_add_rtn_f32">,
	AtomicNoRet<"ds_add_f32", 1>;
	def DS_SUB_RTN_U32 : DS_1A1D_RET<"ds_sub_rtn_u32">,			def DS_SUB_RTN_U32 : DS_1A1D_RET<"ds_sub_rtn_u32">,
	AtomicNoRet<"ds_sub_u32", 1>;			AtomicNoRet<"ds_sub_u32", 1>;
	def DS_RSUB_RTN_U32 : DS_1A1D_RET<"ds_rsub_rtn_u32">,			def DS_RSUB_RTN_U32 : DS_1A1D_RET<"ds_rsub_rtn_u32">,
	AtomicNoRet<"ds_rsub_u32", 1>;			AtomicNoRet<"ds_rsub_u32", 1>;
	def DS_INC_RTN_U32 : DS_1A1D_RET<"ds_inc_rtn_u32">,			def DS_INC_RTN_U32 : DS_1A1D_RET<"ds_inc_rtn_u32">,
	AtomicNoRet<"ds_inc_u32", 1>;			AtomicNoRet<"ds_inc_u32", 1>;
	def DS_DEC_RTN_U32 : DS_1A1D_RET<"ds_dec_rtn_u32">,			def DS_DEC_RTN_U32 : DS_1A1D_RET<"ds_dec_rtn_u32">,
	AtomicNoRet<"ds_dec_u32", 1>;			AtomicNoRet<"ds_dec_u32", 1>;
	▲ Show 20 Lines • Show All 181 Lines • ▼ Show 20 Lines

	let Uses = [EXEC] in {			let Uses = [EXEC] in {
	def DS_PERMUTE_B32 : DS_1A1D_PERMUTE <"ds_permute_b32",			def DS_PERMUTE_B32 : DS_1A1D_PERMUTE <"ds_permute_b32",
	int_amdgcn_ds_permute>;			int_amdgcn_ds_permute>;
	def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",			def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
	int_amdgcn_ds_bpermute>;			int_amdgcn_ds_bpermute>;
	}			}

				def DS_ADD_RTN_F32 : DS_1A1D_RET<"ds_add_rtn_f32">,
				AtomicNoRet<"ds_add_f32", 1>;
				def DS_ADD_F32 : DS_1A1D_NORET<"ds_add_f32">;

	} // let SubtargetPredicate = isVI			} // let SubtargetPredicate = isVI

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// DS Patterns			// DS Patterns
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	let Predicates = [isGCN] in {			let Predicates = [isGCN] in {

	▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines
	def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;			def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
	def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;			def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
	def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;			def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
	def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;			def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
	def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;			def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
	def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;			def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
	def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;			def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;

				multiclass DSAtomicPatF32<string op> {
				def : Pat <
				(!cast<Intrinsic>(NAME#"_rtn_f32") (DS1Addr1Offset i32:$ptr), f32:$value),
				(!cast<DS_1A1D_RET>(op#"_RTN_F32") $ptr, $value, (i16 0), (i1 0))
				>;
				def : Pat <
				(!cast<Intrinsic>(NAME#"_f32") (DS1Addr1Offset i32:$ptr), f32:$value),
				(!cast<DS_1A1D_NORET>(op#"_F32") $ptr, $value, (i16 0), (i1 0))
				>;
				}
				t-tyeUnsubmitted Done Reply Inline Actions Also, somehow a MachineMemoryOperand needs to be created with values from the ordering, scope and isVolatile LLVM IR instruction operands. t-tye: Also, somehow a MachineMemoryOperand needs to be created with values from the ordering, scope…

				defm int_amdgcn_ds_add : DSAtomicPatF32<"DS_ADD">;
				defm int_amdgcn_ds_min : DSAtomicPatF32<"DS_MIN">;
				defm int_amdgcn_ds_max : DSAtomicPatF32<"DS_MAX">;

	// 64-bit atomics.			// 64-bit atomics.
	def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;			def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
	def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;			def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
	def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;			def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
	def : DSAtomicRetPat<DS_INC_RTN_U64, i64, si_atomic_inc_local>;			def : DSAtomicRetPat<DS_INC_RTN_U64, i64, si_atomic_inc_local>;
	def : DSAtomicRetPat<DS_DEC_RTN_U64, i64, si_atomic_dec_local>;			def : DSAtomicRetPat<DS_DEC_RTN_U64, i64, si_atomic_dec_local>;
	def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;			def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
	def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;			def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
	▲ Show 20 Lines • Show All 366 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/llvm.amdgcn.ds.f32.ll

This file was added.

				; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s \| FileCheck %s

				declare float @llvm.amdgcn.ds.add.rtn.f32(float addrspace(3)*, float) #0
				declare void @llvm.amdgcn.ds.add.f32(float addrspace(3)*, float) #0
				declare float @llvm.amdgcn.ds.min.rtn.f32(float addrspace(3)*, float) #0
				declare void @llvm.amdgcn.ds.min.f32(float addrspace(3)*, float) #0
				declare float @llvm.amdgcn.ds.max.rtn.f32(float addrspace(3)*, float) #0
				declare void @llvm.amdgcn.ds.max.f32(float addrspace(3)*, float) #0

				; FUNC-LABEL: {{^}}ds_f32:
				; CHECK: ds_add_f32 v{{[0-9]+}}, v{{[0-9]+}}
				; CHECK: ds_add_rtn_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
				; CHECK: ds_min_f32 v{{[0-9]+}}, v{{[0-9]+}}
				; CHECK: ds_min_rtn_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
				; CHECK: ds_max_f32 v{{[0-9]+}}, v{{[0-9]+}}
				; CHECK: ds_max_rtn_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
				define amdgpu_kernel void @ds_f32(float addrspace(1)* %out, float addrspace(3)* %src1, float %src2) nounwind {
				call void @llvm.amdgcn.ds.add.f32(float addrspace(3)* %src1, float %src2)
				%res = call float @llvm.amdgcn.ds.add.rtn.f32(float addrspace(3)* %src1, float %src2)
				call void @llvm.amdgcn.ds.min.f32(float addrspace(3)* %src1, float %res)
				%res2 = call float @llvm.amdgcn.ds.min.rtn.f32(float addrspace(3)* %src1, float %res)
				call void @llvm.amdgcn.ds.max.f32(float addrspace(3)* %src1, float %res2)
				%res3 = call float @llvm.amdgcn.ds.max.rtn.f32(float addrspace(3)* %src1, float %res2)
				store float %res3, float addrspace(1)* %out, align 4
				ret void
				}

				attributes #0 = { nounwind argmemonly }

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] add LDS f32 intrinsics
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 115694

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/DSInstructions.td

test/CodeGen/AMDGPU/llvm.amdgcn.ds.f32.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] add LDS f32 intrinsicsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 115694

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/DSInstructions.td

test/CodeGen/AMDGPU/llvm.amdgcn.ds.f32.ll

[AMDGPU] add LDS f32 intrinsics
ClosedPublic