This is an archive of the discontinued LLVM Phabricator instance.

include/llvm/IR/IntrinsicsAMDGPU.td
364 ↗	(On Diff #152788)	It does. It cannot preserve denormals. It is even lowered into FMAD_FTZ, which is lowered to mad. The new part is this intrinsic sitting above the existing SDNode.

arsenm added inline comments.Jun 26 2018, 1:47 AM

include/llvm/IR/IntrinsicsAMDGPU.td
364 ↗	(On Diff #152788)	In that case I would make this intrinsic type mangled and make it work for f16 as well

rampitec added inline comments.Jun 26 2018, 7:19 AM

include/llvm/IR/IntrinsicsAMDGPU.td
364 ↗	(On Diff #152788)	AFAIR f16 does not flush.

rampitec added inline comments.Jun 26 2018, 7:51 AM

include/llvm/IR/IntrinsicsAMDGPU.td
364 ↗	(On Diff #152788)	Sorry for confusion: v_mad_f32 flushes. v_mad_f16 does not. That is why it is not overloaded.

LGTM. A test showing the source modifiers are matched wouldn't hurt

This revision is now accepted and ready to land.Jun 26 2018, 12:17 PM

Added tests with source modifiers.

Closed by commit rL335654: [AMDGPU] Add llvm.amdgcn.fmad.ftz intrinsic (authored by rampitec). · Explain WhyJun 26 2018, 1:09 PM

This revision was automatically updated to reflect the committed changes.

Actually according to the selection code, f16 mad does not support denormals the same, so the intrinsic should work with f16 if that is correct

In D48573#1144712, @arsenm wrote:

Actually according to the selection code, f16 mad does not support denormals the same, so the intrinsic should work with f16 if that is correct

https://reviews.llvm.org/D48677

Perhaps fixes needed.

include/llvm/IR/IntrinsicsAMDGPU.td
364 ↗	(On Diff #152788)	According to SCDevUtil/SCMathengine, V_MAD_F16 always flushes HP denormals. Please double-check.

This revision is now accepted and ready to land.Jul 5 2018, 10:17 AM

This was extended to f16 in r335866

Revision Contents

Path

Size

llvm/

trunk/

include/

llvm/

IR/

IntrinsicsAMDGPU.td

6 lines

lib/

Target/

AMDGPU/

SIISelLowering.cpp

3 lines

test/

CodeGen/

AMDGPU/

llvm.amdgcn.fmad.ftz.ll

114 lines

Diff 152947

llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td

	Show First 20 Lines • Show All 354 Lines • ▼ Show 20 Lines

	// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz			// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
	// should be used.			// should be used.
	def int_amdgcn_sffbh :			def int_amdgcn_sffbh :
	Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],			Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
	[IntrNoMem, IntrSpeculatable]			[IntrNoMem, IntrSpeculatable]
	>;			>;

				// v_mad_f32/v_mac_f32, selected regardless of denorm support.
				def int_amdgcn_fmad_ftz :
				Intrinsic<[llvm_float_ty],
				[llvm_float_ty, llvm_float_ty, llvm_float_ty],
				[IntrNoMem, IntrSpeculatable]
				>;

	// Fields should mirror atomicrmw			// Fields should mirror atomicrmw
	class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],			class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
	[llvm_anyptr_ty,			[llvm_anyptr_ty,
	LLVMMatchType<0>,			LLVMMatchType<0>,
	llvm_i32_ty, // ordering			llvm_i32_ty, // ordering
	llvm_i32_ty, // scope			llvm_i32_ty, // scope
	llvm_i1_ty], // isVolatile			llvm_i1_ty], // isVolatile
	▲ Show 20 Lines • Show All 957 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,916 Lines • ▼ Show 20 Lines	case Intrinsic::amdgcn_wqm: {
return SDValue(DAG.getMachineNode(AMDGPU::WQM, DL, Src.getValueType(), Src),		return SDValue(DAG.getMachineNode(AMDGPU::WQM, DL, Src.getValueType(), Src),
0);		0);
}		}
case Intrinsic::amdgcn_wwm: {		case Intrinsic::amdgcn_wwm: {
SDValue Src = Op.getOperand(1);		SDValue Src = Op.getOperand(1);
return SDValue(DAG.getMachineNode(AMDGPU::WWM, DL, Src.getValueType(), Src),		return SDValue(DAG.getMachineNode(AMDGPU::WWM, DL, Src.getValueType(), Src),
0);		0);
}		}
		case Intrinsic::amdgcn_fmad_ftz:
		return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
		Op.getOperand(2), Op.getOperand(3));
default:		default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =		if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))		AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
return lowerImage(Op, ImageDimIntr, DAG);		return lowerImage(Op, ImageDimIntr, DAG);

return Op;		return Op;
}		}
}		}
▲ Show 20 Lines • Show All 3,339 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll

				; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN %s
				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN %s
				; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN %s
				; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN %s

				declare float @llvm.amdgcn.fmad.ftz(float %a, float %b, float %c)

				; GCN-LABEL: {{^}}mad_f32:
				; GCN: v_ma{{[dc]}}_f32
				define amdgpu_kernel void @mad_f32(
				float addrspace(1)* %r,
				float addrspace(1)* %a,
				float addrspace(1)* %b,
				float addrspace(1)* %c) {
				%a.val = load float, float addrspace(1)* %a
				%b.val = load float, float addrspace(1)* %b
				%c.val = load float, float addrspace(1)* %c
				%r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %b.val, float %c.val)
				store float %r.val, float addrspace(1)* %r
				ret void
				}

				; GCN-LABEL: {{^}}mad_f32_imm_a:
				; GCN: v_mov_b32_e32 [[KA:v[0-9]+]], 0x41000000
				; GCN: v_ma{{[dc]}}_f32 {{v[0-9]+}}, [[KA]],
				define amdgpu_kernel void @mad_f32_imm_a(
				float addrspace(1)* %r,
				float addrspace(1)* %b,
				float addrspace(1)* %c) {
				%b.val = load float, float addrspace(1)* %b
				%c.val = load float, float addrspace(1)* %c
				%r.val = call float @llvm.amdgcn.fmad.ftz(float 8.0, float %b.val, float %c.val)
				store float %r.val, float addrspace(1)* %r
				ret void
				}

				; GCN-LABEL: {{^}}mad_f32_imm_b:
				; GCN: v_mov_b32_e32 [[KB:v[0-9]+]], 0x41000000
				; GCN: v_ma{{[dc]}}_f32 {{v[0-9]+}}, {{[vs][0-9]+}}, [[KB]],
				define amdgpu_kernel void @mad_f32_imm_b(
				float addrspace(1)* %r,
				float addrspace(1)* %a,
				float addrspace(1)* %c) {
				%a.val = load float, float addrspace(1)* %a
				%c.val = load float, float addrspace(1)* %c
				%r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float 8.0, float %c.val)
				store float %r.val, float addrspace(1)* %r
				ret void
				}

				; GCN-LABEL: {{^}}mad_f32_imm_c:
				; GCN: v_mov_b32_e32 [[KC:v[0-9]+]], 0x41000000
				; GCN: v_ma{{[dc]}}_f32 {{v[0-9]+}}, {{[vs][0-9]+}}, {{v[0-9]+}}, [[KC]]{{$}}
				define amdgpu_kernel void @mad_f32_imm_c(
				float addrspace(1)* %r,
				float addrspace(1)* %a,
				float addrspace(1)* %b) {
				%a.val = load float, float addrspace(1)* %a
				%b.val = load float, float addrspace(1)* %b
				%r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %b.val, float 8.0)
				store float %r.val, float addrspace(1)* %r
				ret void
				}

				; GCN-LABEL: {{^}}mad_f32_neg_b:
				; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
				define amdgpu_kernel void @mad_f32_neg_b(
				float addrspace(1)* %r,
				float addrspace(1)* %a,
				float addrspace(1)* %b,
				float addrspace(1)* %c) {
				%a.val = load float, float addrspace(1)* %a
				%b.val = load float, float addrspace(1)* %b
				%c.val = load float, float addrspace(1)* %c
				%neg.b = fsub float -0.0, %b.val
				%r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %neg.b, float %c.val)
				store float %r.val, float addrspace(1)* %r
				ret void
				}

				; GCN-LABEL: {{^}}mad_f32_abs_b:
				; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, \|v{{[0-9]+}}\|, v{{[0-9]+}}
				define amdgpu_kernel void @mad_f32_abs_b(
				float addrspace(1)* %r,
				float addrspace(1)* %a,
				float addrspace(1)* %b,
				float addrspace(1)* %c) {
				%a.val = load float, float addrspace(1)* %a
				%b.val = load float, float addrspace(1)* %b
				%c.val = load float, float addrspace(1)* %c
				%abs.b = call float @llvm.fabs.f32(float %b.val)
				%r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %abs.b, float %c.val)
				store float %r.val, float addrspace(1)* %r
				ret void
				}

				; GCN-LABEL: {{^}}mad_f32_neg_abs_b:
				; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -\|v{{[0-9]+}}\|, v{{[0-9]+}}
				define amdgpu_kernel void @mad_f32_neg_abs_b(
				float addrspace(1)* %r,
				float addrspace(1)* %a,
				float addrspace(1)* %b,
				float addrspace(1)* %c) {
				%a.val = load float, float addrspace(1)* %a
				%b.val = load float, float addrspace(1)* %b
				%c.val = load float, float addrspace(1)* %c
				%abs.b = call float @llvm.fabs.f32(float %b.val)
				%neg.abs.b = fsub float -0.0, %abs.b
				%r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %neg.abs.b, float %c.val)
				store float %r.val, float addrspace(1)* %r
				ret void
				}

				declare float @llvm.fabs.f32(float)

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Add llvm.amdgcn.fmad.ftz intrinsicClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 152947

llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll

[AMDGPU] Add llvm.amdgcn.fmad.ftz intrinsic
ClosedPublic