This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/PowerPC/
-
Target/
-
PowerPC/
1/4
PPCISelLowering.cpp
-
test/CodeGen/PowerPC/
-
CodeGen/
-
PowerPC/
-
fp-classify.ll

Differential D138696

[PowerPC] Exploit test data class instruction for isinf/iszero
AbandonedPublic

Authored by qiucf on Nov 25 2022, 1:48 AM.

Download Raw Diff

Details

Reviewers

nemanjai
shchenz

Group Reviewers

Restricted Project

Summary

Since ISA 3.0, we have test data class instruction (x(s|v)tstdc(s|d|q)p) to classify a floating point value, which can be used to combine common FP classify operations like isinf or zero comparison.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

qiucf created this revision.Nov 25 2022, 1:48 AM

Herald added a project: Restricted Project. · View Herald TranscriptNov 25 2022, 1:48 AM

Herald added subscribers: kbarton, hiraditya. · View Herald Transcript

qiucf requested review of this revision.Nov 25 2022, 1:48 AM

Herald added a project: Restricted Project. · View Herald TranscriptNov 25 2022, 1:48 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B199494: Diff 477887.Nov 25 2022, 2:28 AM

Ping

Thanks for working on this, I think it should be workable. However seems adding a new PPCISD would be preferable, like PPCISD::FP_CLASS? You can find same handling in AMDGPU arch FP_CLASS node.
With this new ISD:
1: we can leverage the table-gen to select the instruction.
2: we can do further combine
3: maybe this node can also be used for the fp class intrinsics like int_ppc_test_data_class_f and int_ppc_test_data_class_d?

llvm/lib/Target/PowerPC/PPCISelLowering.cpp
14144	Can we make these enums be ordered?
14158	Any reason we don't handle NAN and denormal inputs?

Will change to a combine to IS_FPCLASS (based on D140381)

Rebased on D140381

qiucf added a parent revision: D140381: [PowerPC] Use Power9 test data class instruction to lower IS_FPCLASS.Dec 21 2022, 1:23 AM

Harbormaster completed remote builds in B204337: Diff 484495.Dec 21 2022, 2:08 AM

shchenz added inline comments.Dec 22 2022, 8:01 PM

llvm/lib/Target/PowerPC/PPCISelLowering.cpp
14291	I have same concern as previous patch: any reason for the nan and negdenormal/posdenormal are excluded here?

qiucf added inline comments.Dec 26 2022, 6:24 PM

llvm/lib/Target/PowerPC/PPCISelLowering.cpp
14291	`isnan` uses cond code `SETUO` against arbitary non-nan number, whose codegen is also a single `fcmpu`. C std or LLVM intrinsics don't have `issubnormal/isdenormal`. For `isnormal`, the pattern is `(abs(x) uge CONSTANT1) and (abs(x) olt CONSTANT2)`. Optimizing it in backend may be profitable, but (1) is out of scope of this patch; (2) clang will directly generate `is_fpclass` call in the future

Gentle ping...

Actually, now that I've looked over the whole patch, it is not clear to me why this is in the PPC back end? There doesn't seem to be any PPC-specific requirements here. Why is it not OK to put the combine in DAGCombiner.cpp and if we don't want the combine to fire on all targets/subtargets, we can just add TargetLoweringInfo::shouldCombineToIsFPClass().

I've requested changes to require that either this be moved to target independent code or a justification be provided as to why this is PPC specific.

llvm/lib/Target/PowerPC/PPCISelLowering.cpp
14275	Nit: name this something like `IsFABS` or `IsAbsVal` (my preference would be the first one).

This revision now requires changes to proceed.Feb 20 2023, 7:32 AM

I'd like to abandon this one since frontend change will eliminate this problem.

Revision Contents

Path

Size

llvm/

lib/

Target/

PowerPC/

PPCISelLowering.cpp

33 lines

test/

CodeGen/

PowerPC/

fp-classify.ll

35 lines

Diff 484495

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Context not available.
	// a binary operator match, so go through the list in reverse so that	// a binary operator match, so go through the list in reverse so that
	// we've likely promoted both operands first.	// we've likely promoted both operands first.
	while (!PromOpHandles.empty()) {	while (!PromOpHandles.empty()) {
	SDValue PromOp = PromOpHandles.back().getValue();	SDValue PromOp = PromOpHandles.back().getValue();
	PromOpHandles.pop_back();	PromOpHandles.pop_back();

	unsigned C;	unsigned C;
		shchenzUnsubmitted Not Done Reply Inline Actions Any reason we don't handle NAN and denormal inputs? shchenz: Any reason we don't handle NAN and denormal inputs?
	switch (PromOp.getOpcode()) {	switch (PromOp.getOpcode()) {
	default: C = 0; break;	default: C = 0; break;
	case ISD::SELECT: C = 1; break;	case ISD::SELECT: C = 1; break;
	case ISD::SELECT_CC: C = 2; break;	case ISD::SELECT_CC: C = 2; break;
	}	}

	if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&	if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
	PromOp.getOperand(C).getValueType() != N->getValueType(0)) \|\|	PromOp.getOperand(C).getValueType() != N->getValueType(0)) \|\|
	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),	DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
	ShiftCst);	ShiftCst);
	}	}

	SDValue PPCTargetLowering::combineSetCC(SDNode *N,	SDValue PPCTargetLowering::combineSetCC(SDNode *N,
	DAGCombinerInfo &DCI) const {	DAGCombinerInfo &DCI) const {
	assert(N->getOpcode() == ISD::SETCC &&	assert(N->getOpcode() == ISD::SETCC &&
	"Should be called with a SETCC node");	"Should be called with a SETCC node");
		EVT VT = N->getValueType(0);
		SDValue LHS = N->getOperand(0);
		SDValue RHS = N->getOperand(1);
		EVT OpVT = LHS.getValueType();
		SDLoc DL(N);
		SelectionDAG &DAG = DCI.DAG;

	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.	// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
	if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&	if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
	LHS.hasOneUse())	LHS.hasOneUse())
	std::swap(LHS, RHS);	std::swap(LHS, RHS);

	// x == 0-y --> x+y == 0	// x == 0-y --> x+y == 0
	// x != 0-y --> x+y != 0	// x != 0-y --> x+y != 0
	if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&	if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
	RHS.hasOneUse()) {	RHS.hasOneUse()) {
	SDLoc DL(N);
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	EVT OpVT = LHS.getValueType();	EVT OpVT = LHS.getValueType();
	SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));	SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
	return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);	return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
	}	}
		} else if (CC == ISD::SETOEQ && Subtarget.hasP9Vector() && !OpVT.isVector()) {
		bool IsAbsolute = LHS.getOpcode() == ISD::FABS;
		nemanjaiUnsubmitted Not Done Reply Inline Actions Nit: name this something like `IsFABS` or `IsAbsVal` (my preference would be the first one). nemanjai: Nit: name this something like `IsFABS` or `IsAbsVal` (my preference would be the first one).
		if (IsAbsolute)
		LHS = LHS.getOperand(0);

		if (const auto *CFP = dyn_cast<ConstantFPSDNode>(RHS.getNode())) {
		const APFloat &APF = CFP->getValueAPF();
		unsigned Flag = 0;
		if (APF.isNegative() && IsAbsolute)
		return DAG.getBoolConstant(false, DL, VT, OpVT);
		if (APF.isPosInfinity())
		Flag = IsAbsolute ? fcInf : fcPosInf;
		else if (APF.isPosZero())
		Flag = IsAbsolute ? fcZero : fcPosZero;
		else if (APF.isNegInfinity())
		Flag = fcNegInf;
		else if (APF.isNegZero())
		Flag = fcNegZero;
		shchenzUnsubmitted Not Done Reply Inline Actions I have same concern as previous patch: any reason for the nan and negdenormal/posdenormal are excluded here? shchenz: I have same concern as previous patch: any reason for the nan and negdenormal/posdenormal are…
		qiucfAuthorUnsubmitted Done Reply Inline Actions `isnan` uses cond code `SETUO` against arbitary non-nan number, whose codegen is also a single `fcmpu`. C std or LLVM intrinsics don't have `issubnormal/isdenormal`. For `isnormal`, the pattern is `(abs(x) uge CONSTANT1) and (abs(x) olt CONSTANT2)`. Optimizing it in backend may be profitable, but (1) is out of scope of this patch; (2) clang will directly generate `is_fpclass` call in the future qiucf: `isnan` uses cond code `SETUO` against arbitary non-nan number, whose codegen is also a single…
		if (Flag)
		return DAG.getNode(ISD::IS_FPCLASS, DL, VT, LHS,
		DAG.getConstant(Flag, DL, MVT::i32));
		}
	}	}

	return DAGCombineTruncBoolExt(N, DCI);	return DAGCombineTruncBoolExt(N, DCI);
	}	}

	// Is this an extending load from an f32 to an f64?	// Is this an extending load from an f32 to an f64?
	static bool isFPExtLoad(SDValue Op) {	static bool isFPExtLoad(SDValue Op) {
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
	▲ Show 20 Lines • Show All 91 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/fp-classify.ll

	Show All 12 Lines
	; P8-NEXT: lfs 1, .LCPI0_0@toc@l(3)			; P8-NEXT: lfs 1, .LCPI0_0@toc@l(3)
	; P8-NEXT: li 3, 0			; P8-NEXT: li 3, 0
	; P8-NEXT: fcmpu 0, 0, 1			; P8-NEXT: fcmpu 0, 0, 1
	; P8-NEXT: iseleq 3, 4, 3			; P8-NEXT: iseleq 3, 4, 3
	; P8-NEXT: blr			; P8-NEXT: blr
	;			;
	; P9-LABEL: abs_isinff:			; P9-LABEL: abs_isinff:
	; P9: # %bb.0: # %entry			; P9: # %bb.0: # %entry
	; P9-NEXT: addis 3, 2, .LCPI0_0@toc@ha			; P9-NEXT: xststdcsp 0, 1, 48
	; P9-NEXT: xsabsdp 0, 1
	; P9-NEXT: li 4, 1
	; P9-NEXT: lfs 1, .LCPI0_0@toc@l(3)
	; P9-NEXT: li 3, 0			; P9-NEXT: li 3, 0
	; P9-NEXT: fcmpu 0, 0, 1			; P9-NEXT: li 4, 1
	; P9-NEXT: iseleq 3, 4, 3			; P9-NEXT: iseleq 3, 4, 3
	; P9-NEXT: blr			; P9-NEXT: blr
	entry:			entry:
	%0 = tail call float @llvm.fabs.f32(float %x)			%0 = tail call float @llvm.fabs.f32(float %x)
	%cmpinf = fcmp oeq float %0, 0x7FF0000000000000			%cmpinf = fcmp oeq float %0, 0x7FF0000000000000
	ret i1 %cmpinf			ret i1 %cmpinf
	}			}

	define zeroext i1 @abs_isinf(double %x) {			define zeroext i1 @abs_isinf(double %x) {
	; P8-LABEL: abs_isinf:			; P8-LABEL: abs_isinf:
	; P8: # %bb.0: # %entry			; P8: # %bb.0: # %entry
	; P8-NEXT: xsabsdp 0, 1			; P8-NEXT: xsabsdp 0, 1
	; P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha			; P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha
	; P8-NEXT: li 4, 1			; P8-NEXT: li 4, 1
	; P8-NEXT: lfs 1, .LCPI1_0@toc@l(3)			; P8-NEXT: lfs 1, .LCPI1_0@toc@l(3)
	; P8-NEXT: li 3, 0			; P8-NEXT: li 3, 0
	; P8-NEXT: fcmpu 0, 0, 1			; P8-NEXT: fcmpu 0, 0, 1
	; P8-NEXT: iseleq 3, 4, 3			; P8-NEXT: iseleq 3, 4, 3
	; P8-NEXT: blr			; P8-NEXT: blr
	;			;
	; P9-LABEL: abs_isinf:			; P9-LABEL: abs_isinf:
	; P9: # %bb.0: # %entry			; P9: # %bb.0: # %entry
	; P9-NEXT: addis 3, 2, .LCPI1_0@toc@ha			; P9-NEXT: xststdcdp 0, 1, 48
	; P9-NEXT: xsabsdp 0, 1
	; P9-NEXT: li 4, 1
	; P9-NEXT: lfs 1, .LCPI1_0@toc@l(3)
	; P9-NEXT: li 3, 0			; P9-NEXT: li 3, 0
	; P9-NEXT: fcmpu 0, 0, 1			; P9-NEXT: li 4, 1
	; P9-NEXT: iseleq 3, 4, 3			; P9-NEXT: iseleq 3, 4, 3
	; P9-NEXT: blr			; P9-NEXT: blr
	entry:			entry:
	%0 = tail call double @llvm.fabs.f64(double %x)			%0 = tail call double @llvm.fabs.f64(double %x)
	%cmpinf = fcmp oeq double %0, 0x7FF0000000000000			%cmpinf = fcmp oeq double %0, 0x7FF0000000000000
	ret i1 %cmpinf			ret i1 %cmpinf
	}			}

	Show All 23 Lines
	; P8-NEXT: srwi 3, 3, 5			; P8-NEXT: srwi 3, 3, 5
	; P8-NEXT: addi 1, 1, 48			; P8-NEXT: addi 1, 1, 48
	; P8-NEXT: ld 0, 16(1)			; P8-NEXT: ld 0, 16(1)
	; P8-NEXT: mtlr 0			; P8-NEXT: mtlr 0
	; P8-NEXT: blr			; P8-NEXT: blr
	;			;
	; P9-LABEL: abs_isinfq:			; P9-LABEL: abs_isinfq:
	; P9: # %bb.0: # %entry			; P9: # %bb.0: # %entry
	; P9-NEXT: addis 3, 2, .LCPI2_0@toc@ha			; P9-NEXT: xststdcqp 0, 2, 48
	; P9-NEXT: xsabsqp 2, 2
	; P9-NEXT: li 4, 1
	; P9-NEXT: addi 3, 3, .LCPI2_0@toc@l
	; P9-NEXT: lxv 35, 0(3)
	; P9-NEXT: li 3, 0			; P9-NEXT: li 3, 0
	; P9-NEXT: xscmpuqp 0, 2, 3			; P9-NEXT: li 4, 1
	; P9-NEXT: iseleq 3, 4, 3			; P9-NEXT: iseleq 3, 4, 3
	; P9-NEXT: blr			; P9-NEXT: blr
	entry:			entry:
	%0 = tail call fp128 @llvm.fabs.f128(fp128 %x)			%0 = tail call fp128 @llvm.fabs.f128(fp128 %x)
	%cmpinf = fcmp oeq fp128 %0, 0xL00000000000000007FFF000000000000			%cmpinf = fcmp oeq fp128 %0, 0xL00000000000000007FFF000000000000
	ret i1 %cmpinf			ret i1 %cmpinf
	}			}

	▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines
	; P8-NEXT: li 3, 0			; P8-NEXT: li 3, 0
	; P8-NEXT: li 4, 1			; P8-NEXT: li 4, 1
	; P8-NEXT: fcmpu 0, 1, 0			; P8-NEXT: fcmpu 0, 1, 0
	; P8-NEXT: iseleq 3, 4, 3			; P8-NEXT: iseleq 3, 4, 3
	; P8-NEXT: blr			; P8-NEXT: blr
	;			;
	; P9-LABEL: iszerof:			; P9-LABEL: iszerof:
	; P9: # %bb.0: # %entry			; P9: # %bb.0: # %entry
	; P9-NEXT: xxlxor 0, 0, 0			; P9-NEXT: xststdcsp 0, 1, 8
	; P9-NEXT: li 3, 0			; P9-NEXT: li 3, 0
	; P9-NEXT: li 4, 1			; P9-NEXT: li 4, 1
	; P9-NEXT: fcmpu 0, 1, 0
	; P9-NEXT: iseleq 3, 4, 3			; P9-NEXT: iseleq 3, 4, 3
	; P9-NEXT: blr			; P9-NEXT: blr
	entry:			entry:
	%cmp = fcmp oeq float %x, 0.000000e+00			%cmp = fcmp oeq float %x, 0.000000e+00
	ret i1 %cmp			ret i1 %cmp
	}			}

	define zeroext i1 @iszero(double %x) {			define zeroext i1 @iszero(double %x) {
	; P8-LABEL: iszero:			; P8-LABEL: iszero:
	; P8: # %bb.0: # %entry			; P8: # %bb.0: # %entry
	; P8-NEXT: xxlxor 0, 0, 0			; P8-NEXT: xxlxor 0, 0, 0
	; P8-NEXT: li 3, 0			; P8-NEXT: li 3, 0
	; P8-NEXT: li 4, 1			; P8-NEXT: li 4, 1
	; P8-NEXT: fcmpu 0, 1, 0			; P8-NEXT: fcmpu 0, 1, 0
	; P8-NEXT: iseleq 3, 4, 3			; P8-NEXT: iseleq 3, 4, 3
	; P8-NEXT: blr			; P8-NEXT: blr
	;			;
	; P9-LABEL: iszero:			; P9-LABEL: iszero:
	; P9: # %bb.0: # %entry			; P9: # %bb.0: # %entry
	; P9-NEXT: xxlxor 0, 0, 0			; P9-NEXT: xststdcdp 0, 1, 8
	; P9-NEXT: li 3, 0			; P9-NEXT: li 3, 0
	; P9-NEXT: li 4, 1			; P9-NEXT: li 4, 1
	; P9-NEXT: fcmpu 0, 1, 0
	; P9-NEXT: iseleq 3, 4, 3			; P9-NEXT: iseleq 3, 4, 3
	; P9-NEXT: blr			; P9-NEXT: blr
	entry:			entry:
	%cmp = fcmp oeq double %x, 0.000000e+00			%cmp = fcmp oeq double %x, 0.000000e+00
	ret i1 %cmp			ret i1 %cmp
	}			}

	define zeroext i1 @iszeroq(fp128 %x) {			define zeroext i1 @iszeroq(fp128 %x) {
	Show All 14 Lines
	; P8-NEXT: srwi 3, 3, 5			; P8-NEXT: srwi 3, 3, 5
	; P8-NEXT: addi 1, 1, 32			; P8-NEXT: addi 1, 1, 32
	; P8-NEXT: ld 0, 16(1)			; P8-NEXT: ld 0, 16(1)
	; P8-NEXT: mtlr 0			; P8-NEXT: mtlr 0
	; P8-NEXT: blr			; P8-NEXT: blr
	;			;
	; P9-LABEL: iszeroq:			; P9-LABEL: iszeroq:
	; P9: # %bb.0: # %entry			; P9: # %bb.0: # %entry
	; P9-NEXT: addis 3, 2, .LCPI7_0@toc@ha			; P9-NEXT: xststdcqp 0, 2, 8
	; P9-NEXT: li 4, 1
	; P9-NEXT: addi 3, 3, .LCPI7_0@toc@l
	; P9-NEXT: lxv 35, 0(3)
	; P9-NEXT: li 3, 0			; P9-NEXT: li 3, 0
	; P9-NEXT: xscmpuqp 0, 2, 3			; P9-NEXT: li 4, 1
	; P9-NEXT: iseleq 3, 4, 3			; P9-NEXT: iseleq 3, 4, 3
	; P9-NEXT: blr			; P9-NEXT: blr
	entry:			entry:
	%cmp = fcmp oeq fp128 %x, 0xL00000000000000000000000000000000			%cmp = fcmp oeq fp128 %x, 0xL00000000000000000000000000000000
	ret i1 %cmp			ret i1 %cmp
	}			}

	define <4 x i1> @iszerov4f32(<4 x float> %x) {			define <4 x i1> @iszerov4f32(<4 x float> %x) {
	Show All 38 Lines