This is an archive of the discontinued LLVM Phabricator instance.

DAG: Preserve FMF when creating fminnum/fmaxnum
AbandonedPublic

Authored by arsenm on Sep 4 2018, 11:06 AM.

Download Raw Diff

Details

Reviewers

spatel
efriedma

Summary

These should preserve the fast math flags on the initial fcmp.
Without this, the nodes may later be expanded to use an
unnecessary quieting operation.

Diff Detail

Event Timeline

arsenm created this revision.Sep 4 2018, 11:06 AM

Herald added a subscriber: wdng. · View Herald TranscriptSep 4 2018, 11:06 AM

Is this just about 'nnan' behavior? If so, can we just use/expand the existing SelectPatternNaNBehavior?

IOW, why is 'nsz' relevant?
Eli raised a question about 'nsz' on fcmp:
https://bugs.llvm.org/show_bug.cgi?id=38086

Does the motivation for this patch make any of the proposed fixes more or less appealing?

Regardless to any answers to the above, I think we should split this into separate IR/DAG patches with tests for each side.

spatel mentioned this in D51145: Guard FMF context by excluding some FP operators from FPMathOperator.Sep 5 2018, 10:07 AM

In D51646#1224799, @spatel wrote:

Is this just about 'nnan' behavior? If so, can we just use/expand the existing SelectPatternNaNBehavior?

IOW, why is 'nsz' relevant?
Eli raised a question about 'nsz' on fcmp:
https://bugs.llvm.org/show_bug.cgi?id=38086

Does the motivation for this patch make any of the proposed fixes more or less appealing?

Regardless to any answers to the above, I think we should split this into separate IR/DAG patches with tests for each side.

The combine to form minnum/maxnum from cmp/select depends on nsz. The DAG version of the combine checks the global NSZ flag, but I think the direct-from-ir version is broken. I've never really liked the matching here in SelectionDAGBuilder, but haven't looked into what regresses if I try just ripping out the IR matching.

There's also another bug from doing this I've noticed, where the dead nodes are left which breaks hasOneUse optimizations

arsenm added a child revision: D51701: ValueTracking: Report fast math flags for fcmp/select.Sep 5 2018, 11:56 AM

arsenm added inline comments.

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
1091–1094	I forgot about this problem. I don't understand why it was trying to strip these flags in the first place, but it will always do it

Split out DAG part

mcberg2017 added a subscriber: mcberg2017.Sep 5 2018, 1:07 PM

aemerson added a subscriber: aemerson.Sep 5 2018, 5:08 PM

aemerson added inline comments.

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
1091–1094	Maybe this is related, we've seen on D51145 that if the FPMathOperator doesn't actually have any FP flags (like insert/extractelements) then the incoming flags result in stripping.

arsenm added a child revision: D51737: DAG: Combine extract_vector_elt of concat_vectors.Sep 6 2018, 10:06 AM

kpn added a subscriber: kpn.Sep 6 2018, 10:17 AM

arsenm abandoned this revision.Apr 5 2020, 7:40 AM

Revision Contents

Path

Size

include/

llvm/

CodeGen/

SelectionDAGNodes.h

19 lines

lib/

CodeGen/

SelectionDAG/

SelectionDAGBuilder.cpp

7 lines

Diff 164095

include/llvm/CodeGen/SelectionDAGNodes.h

Show First 20 Lines • Show All 372 Lines • ▼ Show 20 Lines	public:
/// Default constructor turns off all optimization flags.		/// Default constructor turns off all optimization flags.
SDNodeFlags()		SDNodeFlags()
: AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),		: AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
Exact(false), NoNaNs(false), NoInfs(false),		Exact(false), NoNaNs(false), NoInfs(false),
NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),		NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
AllowContract(false), ApproximateFuncs(false),		AllowContract(false), ApproximateFuncs(false),
AllowReassociation(false) {}		AllowReassociation(false) {}

		/// Propagate the fast-math-flags from IR FastMathFlags
		void copyFMF(FastMathFlags FMF) {
		setNoNaNs(FMF.noNaNs());
		setNoInfs(FMF.noInfs());
		setNoSignedZeros(FMF.noSignedZeros());
		setAllowReciprocal(FMF.allowReciprocal());
		setAllowContract(FMF.allowContract());
		setApproximateFuncs(FMF.approxFunc());
		setAllowReassociation(FMF.allowReassoc());
		}

/// Propagate the fast-math-flags from an IR FPMathOperator.		/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {		void copyFMF(const FPMathOperator &FPMO) {
setNoNaNs(FPMO.hasNoNaNs());		copyFMF(FPMO.getFastMathFlags());
setNoInfs(FPMO.hasNoInfs());
setNoSignedZeros(FPMO.hasNoSignedZeros());
setAllowReciprocal(FPMO.hasAllowReciprocal());
setAllowContract(FPMO.hasAllowContract());
setApproximateFuncs(FPMO.hasApproxFunc());
setAllowReassociation(FPMO.hasAllowReassoc());
}		}

/// Sets the state of the flags to the defined state.		/// Sets the state of the flags to the defined state.
void setDefined() { AnyDefined = true; }		void setDefined() { AnyDefined = true; }
/// Returns true if the flags are in a defined state.		/// Returns true if the flags are in a defined state.
bool isDefined() const { return AnyDefined; }		bool isDefined() const { return AnyDefined; }

// These are mutators for each flag.		// These are mutators for each flag.
▲ Show 20 Lines • Show All 2,077 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,082 Lines • ▼ Show 20 Lines	if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
// TODO: We could handle all flags (nsw, etc) here.		// TODO: We could handle all flags (nsw, etc) here.
// TODO: If an IR instruction maps to >1 node, only the final node will have		// TODO: If an IR instruction maps to >1 node, only the final node will have
// flags set.		// flags set.
if (SDNode *Node = getNodeForIRValue(&I)) {		if (SDNode *Node = getNodeForIRValue(&I)) {
SDNodeFlags IncomingFlags;		SDNodeFlags IncomingFlags;
IncomingFlags.copyFMF(*FPMO);		IncomingFlags.copyFMF(*FPMO);
if (!Node->getFlags().isDefined())		if (!Node->getFlags().isDefined())
Node->setFlags(IncomingFlags);		Node->setFlags(IncomingFlags);
		#if 0
else		else
Node->intersectFlagsWith(IncomingFlags);		Node->intersectFlagsWith(IncomingFlags);
		#endif
		arsenmAuthorUnsubmitted Not Done Reply Inline Actions I forgot about this problem. I don't understand why it was trying to strip these flags in the first place, but it will always do it arsenm: I forgot about this problem. I don't understand why it was trying to strip these flags in the…
		aemersonUnsubmitted Not Done Reply Inline Actions Maybe this is related, we've seen on D51145 that if the FPMathOperator doesn't actually have any FP flags (like insert/extractelements) then the incoming flags result in stripping. aemerson: Maybe this is related, we've seen on D51145 that if the FPMathOperator doesn't actually have…
}		}
}		}

if (!I.isTerminator() && !HasTailCall &&		if (!I.isTerminator() && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally		!isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);		CopyToExportRegsIfNeeded(&I);

CurInst = nullptr;		CurInst = nullptr;
▲ Show 20 Lines • Show All 1,840 Lines • ▼ Show 20 Lines	void SelectionDAGBuilder::visitSelect(const User &I) {

SmallVector<SDValue, 4> Values(NumValues);		SmallVector<SDValue, 4> Values(NumValues);
SDValue Cond = getValue(I.getOperand(0));		SDValue Cond = getValue(I.getOperand(0));
SDValue LHSVal = getValue(I.getOperand(1));		SDValue LHSVal = getValue(I.getOperand(1));
SDValue RHSVal = getValue(I.getOperand(2));		SDValue RHSVal = getValue(I.getOperand(2));
auto BaseOps = {Cond};		auto BaseOps = {Cond};
ISD::NodeType OpCode = Cond.getValueType().isVector() ?		ISD::NodeType OpCode = Cond.getValueType().isVector() ?
ISD::VSELECT : ISD::SELECT;		ISD::VSELECT : ISD::SELECT;
		SDNodeFlags Flags;

// Min/max matching is only viable if all output VTs are the same.		// Min/max matching is only viable if all output VTs are the same.
if (is_splat(ValueVTs)) {		if (is_splat(ValueVTs)) {
EVT VT = ValueVTs[0];		EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();		LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();		auto &TLI = DAG.getTargetLoweringInfo();

// We care about the legality of the operation after it has been type		// We care about the legality of the operation after it has been type
// legalized.		// legalized.
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&		while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
VT != TLI.getTypeToTransformTo(Ctx, VT))		VT != TLI.getTypeToTransformTo(Ctx, VT))
VT = TLI.getTypeToTransformTo(Ctx, VT);		VT = TLI.getTypeToTransformTo(Ctx, VT);

// If the vselect is legal, assume we want to leave this as a vector setcc +		// If the vselect is legal, assume we want to leave this as a vector setcc +
// vselect. Otherwise, if this is going to be scalarized, we want to see if		// vselect. Otherwise, if this is going to be scalarized, we want to see if
// min/max is legal on the scalar type.		// min/max is legal on the scalar type.
bool UseScalarMinMax = VT.isVector() &&		bool UseScalarMinMax = VT.isVector() &&
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);		!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);

Value LHS, RHS;		Value LHS, RHS;
auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);		auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
		Flags.copyFMF(SPR.FMF);

ISD::NodeType Opc = ISD::DELETED_NODE;		ISD::NodeType Opc = ISD::DELETED_NODE;
switch (SPR.Flavor) {		switch (SPR.Flavor) {
case SPF_UMAX: Opc = ISD::UMAX; break;		case SPF_UMAX: Opc = ISD::UMAX; break;
case SPF_UMIN: Opc = ISD::UMIN; break;		case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;		case SPF_SMAX: Opc = ISD::SMAX; break;
case SPF_SMIN: Opc = ISD::SMIN; break;		case SPF_SMIN: Opc = ISD::SMIN; break;
case SPF_FMINNUM:		case SPF_FMINNUM:
switch (SPR.NaNBehavior) {		switch (SPR.NaNBehavior) {
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	void SelectionDAGBuilder::visitSelect(const User &I) {
}		}

for (unsigned i = 0; i != NumValues; ++i) {		for (unsigned i = 0; i != NumValues; ++i) {
SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());		SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));		Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));		Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
Values[i] = DAG.getNode(OpCode, getCurSDLoc(),		Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),		LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
Ops);		Ops, Flags);
}		}

setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),		setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValueVTs), Values));		DAG.getVTList(ValueVTs), Values));
}		}

void SelectionDAGBuilder::visitTrunc(const User &I) {		void SelectionDAGBuilder::visitTrunc(const User &I) {
// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).		// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
▲ Show 20 Lines • Show All 7,312 Lines • Show Last 20 Lines