This is an archive of the discontinued LLVM Phabricator instance.

include/llvm/IR/IntrinsicsAMDGPU.td
181	Could we add a LLVMMatchType to get an int the same width as the FP type to avoid a second mangled parameter? Also could we just leave it as i32? I think it just zeroes the high 16-bits anyway

kzhuravl added inline comments.Nov 10 2016, 9:47 AM

include/llvm/IR/IntrinsicsAMDGPU.td
181	Hi Matt, I do not think I understand your suggestion entirely (LLVMMatchType part of it), can you explain it a bit more? We could leave i32 in the return type of the intrinsic and change `V_FREXP_EXP_I16_F16` to `VOP_I32_F16` and higher 16 bits will be zeroed.

arsenm added inline comments.Nov 11 2016, 10:06 AM

include/llvm/IR/IntrinsicsAMDGPU.td
181	I think we would need to add another one that would be simple to implement in TableGen, something like a LLVMMatchIntFPBitWidth<>. However, we don't actually want that because in the f64 case it's still i32 (which is also part of why I don't understand why this would have been changed to return i16)

Included in D25975

kzhuravl mentioned this in D25975: AMDGPU/SI: Make f16 a legal type for VI subtargets.Nov 11 2016, 12:34 PM

Revision Contents

Path

Size

include/

llvm/

IR/

IntrinsicsAMDGPU.td

2 lines

lib/

Target/

AMDGPU/

VOP1Instructions.td

6 lines

test/

CodeGen/

AMDGPU/

llvm.amdgcn.frexp.exp.f16.ll

15 lines

llvm.amdgcn.frexp.exp.ll

16 lines

Diff 77401

include/llvm/IR/IntrinsicsAMDGPU.td

Show First 20 Lines • Show All 172 Lines • ▼ Show 20 Lines	def int_amdgcn_ldexp : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]		[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
>;		>;

def int_amdgcn_frexp_mant : Intrinsic<		def int_amdgcn_frexp_mant : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]		[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
>;		>;

def int_amdgcn_frexp_exp : Intrinsic<		def int_amdgcn_frexp_exp : Intrinsic<
[llvm_i32_ty], [llvm_anyfloat_ty], [IntrNoMem]		[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]
		arsenmUnsubmitted Not Done Reply Inline Actions Could we add a LLVMMatchType to get an int the same width as the FP type to avoid a second mangled parameter? Also could we just leave it as i32? I think it just zeroes the high 16-bits anyway arsenm: Could we add a LLVMMatchType to get an int the same width as the FP type to avoid a second…
		kzhuravlAuthorUnsubmitted Not Done Reply Inline Actions Hi Matt, I do not think I understand your suggestion entirely (LLVMMatchType part of it), can you explain it a bit more? We could leave i32 in the return type of the intrinsic and change `V_FREXP_EXP_I16_F16` to `VOP_I32_F16` and higher 16 bits will be zeroed. kzhuravl: Hi Matt, I do not think I understand your suggestion entirely (LLVMMatchType part of it), can…
		arsenmUnsubmitted Not Done Reply Inline Actions I think we would need to add another one that would be simple to implement in TableGen, something like a LLVMMatchIntFPBitWidth<>. However, we don't actually want that because in the f64 case it's still i32 (which is also part of why I don't understand why this would have been changed to return i16) arsenm: I think we would need to add another one that would be simple to implement in TableGen…
>;		>;

// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0		// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
// and always uses rtz, so is not suitable for implementing the OpenCL		// and always uses rtz, so is not suitable for implementing the OpenCL
// fract function. It should be ok on VI.		// fract function. It should be ok on VI.
def int_amdgcn_fract : Intrinsic<		def int_amdgcn_fract : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]		[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
>;		>;
▲ Show 20 Lines • Show All 417 Lines • Show Last 20 Lines

lib/Target/AMDGPU/VOP1Instructions.td

	Show First 20 Lines • Show All 284 Lines • ▼ Show 20 Lines
	defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;			defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
	defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;			defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
	defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;			defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
	defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>;			defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>;
	defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;			defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
	defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>;			defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>;
	defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>;			defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>;
	defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;			defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
				defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>;
	// FIXME: V_FREXP_EXP_I16_F16 requires a change to llvm.amdgcn.frexp.exp
	// intrinsic.
	defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16/, int_amdgcn_frexp_exp/>;

	defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;			defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
	defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>;			defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>;
	defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>;			defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>;
	defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>;			defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>;
	defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;			defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
	defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;			defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
	defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;			defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;

	▲ Show 20 Lines • Show All 312 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s

				declare i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a)

				; GCN-LABEL: {{^}}test_simple_vt_frexp_exp
				; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
				; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
				; GCN: buffer_store_short v[[R_I16]]
				define void @test_simple_vt_frexp_exp(i16 addrspace(1)* %r,
				half addrspace(1)* %a) {
				%a.val = load half, half addrspace(1)* %a
				%r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val)
				store i16 %r.val, i16 addrspace(1)* %r
				ret void
				}

test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll

	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s			; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s
	; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s			; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s

	declare float @llvm.fabs.f32(float) #0			declare float @llvm.fabs.f32(float) #0
	declare double @llvm.fabs.f64(double) #0			declare double @llvm.fabs.f64(double) #0
	declare i32 @llvm.amdgcn.frexp.exp.f32(float) #0			declare i32 @llvm.amdgcn.frexp.exp.i32.f32(float) #0
	declare i32 @llvm.amdgcn.frexp.exp.f64(double) #0			declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0

	; GCN-LABEL: {{^}}s_test_frexp_exp_f32:			; GCN-LABEL: {{^}}s_test_frexp_exp_f32:
	; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}			; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
	define void @s_test_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {			define void @s_test_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {
	%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.f32(float %src)			%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %src)
	store i32 %frexp.exp, i32 addrspace(1)* %out			store i32 %frexp.exp, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32:			; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32:
	; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, \|{{s[0-9]+}}\|			; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, \|{{s[0-9]+}}\|
	define void @s_test_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {			define void @s_test_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {
	%fabs.src = call float @llvm.fabs.f32(float %src)			%fabs.src = call float @llvm.fabs.f32(float %src)
	%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.f32(float %fabs.src)			%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fabs.src)
	store i32 %frexp.exp, i32 addrspace(1)* %out			store i32 %frexp.exp, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32:			; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32:
	; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, -\|{{s[0-9]+}}\|			; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, -\|{{s[0-9]+}}\|
	define void @s_test_fneg_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {			define void @s_test_fneg_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {
	%fabs.src = call float @llvm.fabs.f32(float %src)			%fabs.src = call float @llvm.fabs.f32(float %src)
	%fneg.fabs.src = fsub float -0.0, %fabs.src			%fneg.fabs.src = fsub float -0.0, %fabs.src
	%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.f32(float %fneg.fabs.src)			%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fneg.fabs.src)
	store i32 %frexp.exp, i32 addrspace(1)* %out			store i32 %frexp.exp, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}s_test_frexp_exp_f64:			; GCN-LABEL: {{^}}s_test_frexp_exp_f64:
	; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}			; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
	define void @s_test_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {			define void @s_test_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {
	%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.f64(double %src)			%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %src)
	store i32 %frexp.exp, i32 addrspace(1)* %out			store i32 %frexp.exp, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64:			; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64:
	; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, \|{{s\[[0-9]+:[0-9]+\]}}\|			; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, \|{{s\[[0-9]+:[0-9]+\]}}\|
	define void @s_test_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {			define void @s_test_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {
	%fabs.src = call double @llvm.fabs.f64(double %src)			%fabs.src = call double @llvm.fabs.f64(double %src)
	%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.f64(double %fabs.src)			%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fabs.src)
	store i32 %frexp.exp, i32 addrspace(1)* %out			store i32 %frexp.exp, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64:			; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64:
	; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, -\|{{s\[[0-9]+:[0-9]+\]}}\|			; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, -\|{{s\[[0-9]+:[0-9]+\]}}\|
	define void @s_test_fneg_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {			define void @s_test_fneg_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {
	%fabs.src = call double @llvm.fabs.f64(double %src)			%fabs.src = call double @llvm.fabs.f64(double %src)
	%fneg.fabs.src = fsub double -0.0, %fabs.src			%fneg.fabs.src = fsub double -0.0, %fabs.src
	%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.f64(double %fneg.fabs.src)			%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fneg.fabs.src)
	store i32 %frexp.exp, i32 addrspace(1)* %out			store i32 %frexp.exp, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	attributes #0 = { nounwind readnone }			attributes #0 = { nounwind readnone }
	attributes #1 = { nounwind }			attributes #1 = { nounwind }

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Update llvm.amdgcn.frexp.exp intrinsic and lower it to v_frexp_exp_i16_f16 instructionAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 77401

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/VOP1Instructions.td

test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll

test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll

[AMDGPU] Update llvm.amdgcn.frexp.exp intrinsic and lower it to v_frexp_exp_i16_f16 instruction
AbandonedPublic