This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Fold fneg into fminnum/fmaxnum
ClosedPublic

Authored by arsenm on Jan 16 2017, 8:32 PM.

Download Raw Diff

Details

Reviewers

• tstellarAMD
escha

Diff Detail

Event Timeline

arsenm created this revision.Jan 16 2017, 8:32 PM

Herald added a reviewer: • tstellarAMD. · View Herald TranscriptJan 16 2017, 8:32 PM

Herald added subscribers: tony-tye, yaxunl, nhaehnle and 2 others. · View Herald Transcript

arsenm added a child revision: D28912: AMDGPU: Fold fneg into fmin/fmax_legacy.Jan 19 2017, 12:33 PM

ping

escha accepted this revision.Jan 30 2017, 9:21 AM

This revision is now accepted and ready to land.Jan 30 2017, 9:21 AM

LGTM with minor comment.

lib/Target/AMDGPU/AMDGPUISelLowering.cpp
2991–2992	I think this comments should be: // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)

r293968. I had to add a special case to stop folding 0s to avoid a quality regression in shader-db which should be extended to other operations though

Revision Contents

Path

Size

lib/

Target/

AMDGPU/

AMDGPUISelLowering.cpp

18 lines

test/

CodeGen/

AMDGPU/

fneg-combines.ll

190 lines

Diff 84626

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Show First 20 Lines • Show All 487 Lines • ▼ Show 20 Lines
LLVM_READNONE		LLVM_READNONE
static bool fnegFoldsIntoOp(unsigned Opc) {		static bool fnegFoldsIntoOp(unsigned Opc) {
switch (Opc) {		switch (Opc) {
case ISD::FADD:		case ISD::FADD:
case ISD::FSUB:		case ISD::FSUB:
case ISD::FMUL:		case ISD::FMUL:
case ISD::FMA:		case ISD::FMA:
case ISD::FMAD:		case ISD::FMAD:
		case ISD::FMINNUM:
		case ISD::FMAXNUM:
case ISD::FSIN:		case ISD::FSIN:
case ISD::FTRUNC:		case ISD::FTRUNC:
case ISD::FRINT:		case ISD::FRINT:
case ISD::FNEARBYINT:		case ISD::FNEARBYINT:
case AMDGPUISD::RCP:		case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:		case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::SIN_HW:		case AMDGPUISD::SIN_HW:
case AMDGPUISD::FMUL_LEGACY:		case AMDGPUISD::FMUL_LEGACY:
▲ Show 20 Lines • Show All 2,475 Lines • ▼ Show 20 Lines	case ISD::FMAD: {
else		else
RHS = RHS.getOperand(0);		RHS = RHS.getOperand(0);

SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);		SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
if (!N0.hasOneUse())		if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));		DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;		return Res;
}		}
		case ISD::FMAXNUM:
		case ISD::FMINNUM: {
		// fneg (fmaxnum x, y) -> fmaxnum (fneg x), (fneg y)
		// fneg (fminnum x, y) -> fminnum (fneg x), (fneg y)
		kzhuravlUnsubmitted Not Done Reply Inline Actions I think this comments should be: // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y) kzhuravl: I think this comments should be: ``` // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) //…
		SDValue LHS = N0.getOperand(0);
		SDValue RHS = N0.getOperand(1);

		SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
		SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
		unsigned Opposite = (Opc == ISD::FMAXNUM) ? ISD::FMINNUM : ISD::FMAXNUM;

		SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
		if (!N0.hasOneUse())
		DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
		return Res;
		}
case ISD::FP_EXTEND:		case ISD::FP_EXTEND:
case ISD::FTRUNC:		case ISD::FTRUNC:
case ISD::FRINT:		case ISD::FRINT:
case ISD::FNEARBYINT: // XXX - Should fround be handled?		case ISD::FNEARBYINT: // XXX - Should fround be handled?
case ISD::FSIN:		case ISD::FSIN:
case AMDGPUISD::RCP:		case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:		case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::SIN_HW: {		case AMDGPUISD::SIN_HW: {
▲ Show 20 Lines • Show All 520 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/fneg-combines.ll

Show First 20 Lines • Show All 347 Lines • ▼ Show 20 Lines	define void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
%fneg = fsub float -0.000000e+00, %mul		%fneg = fsub float -0.000000e+00, %mul
%use1 = fmul float %fneg.a, %c		%use1 = fmul float %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out		store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out		store volatile float %use1, float addrspace(1)* %out
ret void		ret void
}		}

; --------------------------------------------------------------------------------		; --------------------------------------------------------------------------------
		; fminnum tests
		; --------------------------------------------------------------------------------

		; GCN-LABEL: {{^}}v_fneg_minnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: {{buffer\|flat}}_load_dword [[B:v[0-9]+]]
		; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%b = load volatile float, float addrspace(1)* %b.gep
		%min = call float @llvm.minnum.f32(float %a, float %b)
		%fneg = fsub float -0.000000e+00, %min
		store float %fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_self_minnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_self_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%min = call float @llvm.minnum.f32(float %a, float %a)
		%min.fneg = fsub float -0.0, %min
		store float %min.fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_posk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%min = call float @llvm.minnum.f32(float 4.0, float %a)
		%fneg = fsub float -0.000000e+00, %min
		store float %fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_negk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%min = call float @llvm.minnum.f32(float -4.0, float %a)
		%fneg = fsub float -0.000000e+00, %min
		store float %fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: {{buffer\|flat}}_load_dword [[B:v[0-9]+]]
		; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
		; GCN-NEXT: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], 4.0
		; GCN-NEXT: buffer_store_dword [[MAX0]]
		; GCN-NEXT: buffer_store_dword [[MUL1]]
		define void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%b = load volatile float, float addrspace(1)* %b.gep
		%min = call float @llvm.minnum.f32(float %a, float %b)
		%fneg = fsub float -0.000000e+00, %min
		%use1 = fmul float %min, 4.0
		store volatile float %fneg, float addrspace(1)* %out
		store volatile float %use1, float addrspace(1)* %out
		ret void
		}

		; --------------------------------------------------------------------------------
		; fmaxnum tests
		; --------------------------------------------------------------------------------

		; GCN-LABEL: {{^}}v_fneg_maxnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: {{buffer\|flat}}_load_dword [[B:v[0-9]+]]
		; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%b = load volatile float, float addrspace(1)* %b.gep
		%min = call float @llvm.maxnum.f32(float %a, float %b)
		%fneg = fsub float -0.000000e+00, %min
		store float %fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_self_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%min = call float @llvm.maxnum.f32(float %a, float %a)
		%min.fneg = fsub float -0.0, %min
		store float %min.fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_posk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%min = call float @llvm.maxnum.f32(float 4.0, float %a)
		%fneg = fsub float -0.000000e+00, %min
		store float %fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0
		; GCN: buffer_store_dword [[RESULT]]
		define void @v_fneg_negk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%min = call float @llvm.maxnum.f32(float -4.0, float %a)
		%fneg = fsub float -0.000000e+00, %min
		store float %fneg, float addrspace(1)* %out.gep
		ret void
		}

		; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32:
		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
		; GCN: {{buffer\|flat}}_load_dword [[B:v[0-9]+]]
		; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
		; GCN-NEXT: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], 4.0
		; GCN-NEXT: buffer_store_dword [[MAX0]]
		; GCN-NEXT: buffer_store_dword [[MUL1]]
		define void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
		%tid = call i32 @llvm.amdgcn.workitem.id.x()
		%tid.ext = sext i32 %tid to i64
		%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
		%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
		%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
		%a = load volatile float, float addrspace(1)* %a.gep
		%b = load volatile float, float addrspace(1)* %b.gep
		%min = call float @llvm.maxnum.f32(float %a, float %b)
		%fneg = fsub float -0.000000e+00, %min
		%use1 = fmul float %min, 4.0
		store volatile float %fneg, float addrspace(1)* %out
		store volatile float %use1, float addrspace(1)* %out
		ret void
		}

		; --------------------------------------------------------------------------------
; fma tests		; fma tests
; --------------------------------------------------------------------------------		; --------------------------------------------------------------------------------

; GCN-LABEL: {{^}}v_fneg_fma_f32:		; GCN-LABEL: {{^}}v_fneg_fma_f32:
; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]		; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer\|flat}}_load_dword [[B:v[0-9]+]]		; GCN: {{buffer\|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer\|flat}}_load_dword [[C:v[0-9]+]]		; GCN: {{buffer\|flat}}_load_dword [[C:v[0-9]+]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]		; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
▲ Show 20 Lines • Show All 1,367 Lines • ▼ Show 20 Lines
declare i32 @llvm.amdgcn.workitem.id.x() #1		declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fma.f32(float, float, float) #1		declare float @llvm.fma.f32(float, float, float) #1
declare float @llvm.fmuladd.f32(float, float, float) #1		declare float @llvm.fmuladd.f32(float, float, float) #1
declare float @llvm.sin.f32(float) #1		declare float @llvm.sin.f32(float) #1
declare float @llvm.trunc.f32(float) #1		declare float @llvm.trunc.f32(float) #1
declare float @llvm.round.f32(float) #1		declare float @llvm.round.f32(float) #1
declare float @llvm.rint.f32(float) #1		declare float @llvm.rint.f32(float) #1
declare float @llvm.nearbyint.f32(float) #1		declare float @llvm.nearbyint.f32(float) #1
		declare float @llvm.minnum.f32(float, float) #1
		declare float @llvm.maxnum.f32(float, float) #1

declare double @llvm.fma.f64(double, double, double) #1		declare double @llvm.fma.f64(double, double, double) #1

declare float @llvm.amdgcn.sin.f32(float) #1		declare float @llvm.amdgcn.sin.f32(float) #1
declare float @llvm.amdgcn.rcp.f32(float) #1		declare float @llvm.amdgcn.rcp.f32(float) #1
declare float @llvm.amdgcn.rcp.legacy(float) #1		declare float @llvm.amdgcn.rcp.legacy(float) #1
declare float @llvm.amdgcn.fmul.legacy(float, float) #1		declare float @llvm.amdgcn.fmul.legacy(float, float) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0		declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0		declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0

attributes #0 = { nounwind }		attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }		attributes #1 = { nounwind readnone }