This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: keep track of modifiers when converting v_mac to v_mad
ClosedPublic

Authored by hakzsam on Mar 7 2017, 1:38 PM.

Download Raw Diff

Details

Reviewers

nhaehnle
arsenm
mareko

Summary

Since v_max_f32_e64/v_max_f16_e64 can be folded if the target
instruction supports the clamp bit, we also need to maintain
modifiers when converting v_mac to v_mad.

This fixes a rendering issue with Dirt Rally because a v_mac
instruction with the clamp bit set was converted to a v_mad
but that bit was lost during the conversion.

Fixes: e184e01dd79 ("AMDGPU: Fold FP clamp as modifier bit")

Diff Detail

Event Timeline

hakzsam created this revision.Mar 7 2017, 1:39 PM

Herald added subscribers: tpr, dstuttard, tony-tye and 3 others. · View Herald TranscriptMar 7 2017, 1:39 PM

Needs tests

lib/Target/AMDGPU/SIInstrInfo.cpp
1779	hasModifiersSet won't preserve the value, you need to keep the whole operand. These aren't simple booleans.
1785–1786	should just add the value. omod is not a simple boolean so that is also broken

arsenm added inline comments.Mar 7 2017, 3:45 PM

lib/Target/AMDGPU/SIInstrInfo.cpp
1783	This is also unnecessary, there is no src2_modifiers for mac so this can stay add 0

v2: - preserve valye by using getNamedOperand()->getImm() instead

add v_clamp_mac_to_mad test in clamp-modifier.ll
add v_omod_mac_to_mad test in omod.ll

LGTM

This revision is now accepted and ready to land.Mar 10 2017, 11:29 AM

r297556

Revision Contents

Path

Size

lib/

Target/

AMDGPU/

SIInstrInfo.cpp

14 lines

test/

CodeGen/

AMDGPU/

clamp-modifier.ll

17 lines

omod.ll

11 lines

Diff 91379

lib/Target/AMDGPU/SIInstrInfo.cpp

Context not available.

	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
	const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);	const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
		const MachineOperand *Src0Mods =
		getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
		const MachineOperand *Src1Mods =
		getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
		const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
		const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);

	return BuildMI(*MBB, MI, MI.getDebugLoc(),	return BuildMI(*MBB, MI, MI.getDebugLoc(),
	get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))	get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
		arsenmUnsubmitted Not Done Reply Inline Actions hasModifiersSet won't preserve the value, you need to keep the whole operand. These aren't simple booleans. arsenm: hasModifiersSet won't preserve the value, you need to keep the whole operand. These aren't…
	.add(*Dst)	.add(*Dst)
	.addImm(0) // Src0 mods	.addImm(Src0Mods ? Src0Mods->getImm() : 0)
	.add(*Src0)	.add(*Src0)
	.addImm(0) // Src1 mods	.addImm(Src1Mods ? Src1Mods->getImm() : 0)
		arsenmUnsubmitted Not Done Reply Inline Actions This is also unnecessary, there is no src2_modifiers for mac so this can stay add 0 arsenm: This is also unnecessary, there is no src2_modifiers for mac so this can stay add 0
	.add(*Src1)	.add(*Src1)
	.addImm(0) // Src mods	.addImm(0) // Src mods
	.add(*Src2)	.add(*Src2)
		arsenmUnsubmitted Not Done Reply Inline Actions should just add the value. omod is not a simple boolean so that is also broken arsenm: should just add the value. omod is not a simple boolean so that is also broken
	.addImm(0) // clamp	.addImm(Clamp ? Clamp->getImm() : 0)
	.addImm(0); // omod	.addImm(Omod ? Omod->getImm() : 0);
	}	}

	// It's not generally safe to move VALU instructions across these since it will	// It's not generally safe to move VALU instructions across these since it will
Context not available.

test/CodeGen/AMDGPU/clamp-modifier.ll

Context not available.
	ret void	ret void
	}	}

		; GCN-LABEL: {{^}}v_clamp_mac_to_mad:
		; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]}} clamp{{$}}
		define amdgpu_kernel void @v_clamp_mac_to_mad(float addrspace(1)* %out, float addrspace(1)* %aptr, float %a) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
		%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
		%b = load float, float addrspace(1)* %gep0

		%mul = fmul float %a, %a
		%add = fadd float %mul, %b
		%max = call float @llvm.maxnum.f32(float %add, float 0.0)
		%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
		%res = fadd float %clamp, %b
		store float %res, float addrspace(1)* %out.gep
		ret void
		}

	declare i32 @llvm.amdgcn.workitem.id.x() #1	declare i32 @llvm.amdgcn.workitem.id.x() #1
	declare float @llvm.fabs.f32(float) #1	declare float @llvm.fabs.f32(float) #1
	declare float @llvm.floor.f32(float) #1	declare float @llvm.floor.f32(float) #1
Context not available.

test/CodeGen/AMDGPU/omod.ll

Context not available.
	ret void	ret void
	}	}

		; GCN-LABEL: {{^}}v_omod_mac_to_mad:
		; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} mul:2{{$}}
		define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
		%mul = fmul float %a, %a
		%add = fadd float %mul, %b
		%mad = fmul float %add, 2.0
		%res = fmul float %mad, %b
		store float %res, float addrspace(1)* undef
		ret void
		}

	declare i32 @llvm.amdgcn.workitem.id.x() #1	declare i32 @llvm.amdgcn.workitem.id.x() #1
	declare float @llvm.fabs.f32(float) #1	declare float @llvm.fabs.f32(float) #1
	declare float @llvm.floor.f32(float) #1	declare float @llvm.floor.f32(float) #1
Context not available.