This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/X86/
-
Target/
-
X86/
2/4
X86ISelLowering.cpp
-
test/CodeGen/X86/
-
CodeGen/
-
X86/
1/2
half.ll

Differential D145867

[X86][FP16] Optimize FMAXNUM/FMINNUM into SMAX/SMIN for FP16 emulation under fast math
Changes PlannedPublic

Authored by pengfei on Mar 12 2023, 4:42 AM.

Download Raw Diff

Details

Reviewers

RKSimon
LuoYuanke
skan

Summary

We don't need to promote it to f32 FMAXNUM/FMINNUM if we can make sure
no NaN in inputs.

Fixes #61271

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

pengfei created this revision.Mar 12 2023, 4:42 AM

Herald added a project: Restricted Project. · View Herald TranscriptMar 12 2023, 4:42 AM

Herald added a subscriber: hiraditya. · View Herald Transcript

pengfei requested review of this revision.Mar 12 2023, 4:42 AM

Herald added a project: Restricted Project. · View Herald TranscriptMar 12 2023, 4:42 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

RKSimon added inline comments.Mar 12 2023, 5:02 AM

llvm/lib/Target/X86/X86ISelLowering.cpp
53165	We might need to improve soft float test coverage?
llvm/test/CodeGen/X86/half.ll
1368	It seems a shame to scalarize this when we're already on the FPU, and we have pminsw/pmaxsw since SSE2

Harbormaster completed remote builds in B218867: Diff 504421.Mar 12 2023, 5:33 AM

Address review commnets.

llvm/lib/Target/X86/X86ISelLowering.cpp
53165	Good catch! I didn't intend to support soft float. And IIRC, we have problems in supporting soft float. So bail it out.
llvm/test/CodeGen/X86/half.ll
1368	Good point! We can vectorize it first, though we still need to improve the `f16` extract.

pengfei mentioned this in D145870: [X86][FP16] Combine bitcast(extract_vector_elt vXi16, N) to extract_vector_elt(bitcast(vXi16), N).Mar 12 2023, 6:52 AM

Harbormaster completed remote builds in B218870: Diff 504425.Mar 12 2023, 7:04 AM

LuoYuanke added inline comments.Mar 12 2023, 5:03 PM

llvm/lib/Target/X86/X86ISelLowering.cpp
53175	I forget the encoding of exponent. Is the result same for min(vXf16) and min(i16)?

The negative comparison is not correct.

llvm/lib/Target/X86/X86ISelLowering.cpp
53175	Thanks for the reminding. Review it again, I found it is only correct to positive value. The reason is FP is represented in `Sign-Magnitude` while integer is `2’s Complement Code`. Let me try to think out some way to solve it.

Matt added a subscriber: Matt.Aug 9 2023, 3:52 PM

Revision Contents

Path

Size

llvm/

lib/

Target/

X86/

X86ISelLowering.cpp

17 lines

test/

CodeGen/

X86/

half.ll

33 lines

Diff 504421

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 32,759 Lines • ▼ Show 20 Lines
	}			}

	return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),			return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
	N->getOperand(0), N->getOperand(1));			N->getOperand(0), N->getOperand(1));
	}			}

	static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,			static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {			const X86Subtarget &Subtarget) {
				SDValue Op0 = N->getOperand(0);
				SDValue Op1 = N->getOperand(1);
				SDLoc DL(N);

	EVT VT = N->getValueType(0);			EVT VT = N->getValueType(0);
	if (Subtarget.useSoftFloat() \|\| isSoftFP16(VT, Subtarget))			if (Subtarget.useSoftFloat() \|\| isSoftFP16(VT, Subtarget)) {
				RKSimonUnsubmitted Not Done Reply Inline Actions We might need to improve soft float test coverage? RKSimon: We might need to improve soft float test coverage?
				pengfeiAuthorUnsubmitted Done Reply Inline Actions Good catch! I didn't intend to support soft float. And IIRC, we have problems in supporting soft float. So bail it out. pengfei: Good catch! I didn't intend to support soft float. And IIRC, we have problems in supporting…
				if (DAG.getTarget().Options.NoNaNsFPMath \|\| N->getFlags().hasNoNaNs()) {
				auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? ISD::SMAX : ISD::SMIN;
				EVT NVT = VT.changeTypeToInteger();
				return DAG.getBitcast(VT, DAG.getNode(MinMaxOp, DL, NVT,
				DAG.getBitcast(NVT, Op0),
				DAG.getBitcast(NVT, Op1)));
				}
	return SDValue();			return SDValue();
				}

				LuoYuankeUnsubmitted Not Done Reply Inline Actions I forget the encoding of exponent. Is the result same for min(vXf16) and min(i16)? LuoYuanke: I forget the encoding of exponent. Is the result same for min(vXf16) and min(i16)?
				pengfeiAuthorUnsubmitted Done Reply Inline Actions Thanks for the reminding. Review it again, I found it is only correct to positive value. The reason is FP is represented in `Sign-Magnitude` while integer is `2’s Complement Code`. Let me try to think out some way to solve it. pengfei: Thanks for the reminding. Review it again, I found it is only correct to positive value. The…
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();			const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (!((Subtarget.hasSSE1() && VT == MVT::f32) \|\|			if (!((Subtarget.hasSSE1() && VT == MVT::f32) \|\|
	(Subtarget.hasSSE2() && VT == MVT::f64) \|\|			(Subtarget.hasSSE2() && VT == MVT::f64) \|\|
	(Subtarget.hasFP16() && VT == MVT::f16) \|\|			(Subtarget.hasFP16() && VT == MVT::f16) \|\|
	(VT.isVector() && TLI.isTypeLegal(VT))))			(VT.isVector() && TLI.isTypeLegal(VT))))
	return SDValue();			return SDValue();

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	SDLoc DL(N);
	auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;			auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;

	// If we don't have to respect NaN inputs, this is a direct translation to x86			// If we don't have to respect NaN inputs, this is a direct translation to x86
	// min/max instructions.			// min/max instructions.
	if (DAG.getTarget().Options.NoNaNsFPMath \|\| N->getFlags().hasNoNaNs())			if (DAG.getTarget().Options.NoNaNsFPMath \|\| N->getFlags().hasNoNaNs())
	return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());			return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());

	// If one of the operands is known non-NaN use the native min/max instructions			// If one of the operands is known non-NaN use the native min/max instructions
	▲ Show 20 Lines • Show All 5,495 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/half.ll

	Show First 20 Lines • Show All 1,349 Lines • ▼ Show 20 Lines
	; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]			; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
	; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]			; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
	; CHECK-I686-NEXT: retl			; CHECK-I686-NEXT: retl
	%1 = load <8 x half>, ptr %p, align 8			%1 = load <8 x half>, ptr %p, align 8
	%2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	ret <8 x half> %2			ret <8 x half> %2
	}			}

				declare half @llvm.minnum.f16(half, half)

				define half @pr61271(half %0, half %1) #0 {
				; CHECK-LIBCALL-LABEL: pr61271:
				; CHECK-LIBCALL: # %bb.0:
				; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax
				; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx
				; CHECK-LIBCALL-NEXT: cmpw %ax, %cx
				; CHECK-LIBCALL-NEXT: cmovll %ecx, %eax
				; CHECK-LIBCALL-NEXT: pinsrw $0, %eax, %xmm0
				; CHECK-LIBCALL-NEXT: retq
				RKSimonUnsubmitted Not Done Reply Inline Actions It seems a shame to scalarize this when we're already on the FPU, and we have pminsw/pmaxsw since SSE2 RKSimon: It seems a shame to scalarize this when we're already on the FPU, and we have pminsw/pmaxsw…
				pengfeiAuthorUnsubmitted Done Reply Inline Actions Good point! We can vectorize it first, though we still need to improve the `f16` extract. pengfei: Good point! We can vectorize it first, though we still need to improve the `f16` extract.
				;
				; BWON-F16C-LABEL: pr61271:
				; BWON-F16C: # %bb.0:
				; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax
				; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx
				; BWON-F16C-NEXT: cmpw %ax, %cx
				; BWON-F16C-NEXT: cmovll %ecx, %eax
				; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
				; BWON-F16C-NEXT: retq
				;
				; CHECK-I686-LABEL: pr61271:
				; CHECK-I686: # %bb.0:
				; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
				; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx
				; CHECK-I686-NEXT: cmpw %ax, %cx
				; CHECK-I686-NEXT: cmovll %ecx, %eax
				; CHECK-I686-NEXT: pinsrw $0, %eax, %xmm0
				; CHECK-I686-NEXT: retl
				%3 = call fast half @llvm.minnum.f16(half %0, half %1)
				ret half %3
				}

	attributes #0 = { nounwind }			attributes #0 = { nounwind }