Skip to content

Commit f466001

Browse files
author
Andrew Kaylor
committedMay 25, 2017
Add constrained intrinsics for some libm-equivalent operations
Differential revision: https://reviews.llvm.org/D32319 llvm-svn: 303922
1 parent 1527baa commit f466001

16 files changed

+1134
-72
lines changed
 

‎llvm/docs/LangRef.rst

+460-5
Large diffs are not rendered by default.

‎llvm/include/llvm/CodeGen/ISDOpcodes.h

+8
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,14 @@ namespace ISD {
264264
/// optimized.
265265
STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
266266

267+
/// Constrained versions of libm-equivalent floating point intrinsics.
268+
/// These will be lowered to the equivalent non-constrained pseudo-op
269+
/// (or expanded to the equivalent library call) before final selection.
270+
/// They are used to limit optimizations while the DAG is being optimized.
271+
STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS,
272+
STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2,
273+
STRICT_FRINT, STRICT_FNEARBYINT,
274+
267275
/// FMA - Perform a * b + c with no intermediate rounding step.
268276
FMA,
269277

‎llvm/include/llvm/CodeGen/SelectionDAG.h

+5
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,11 @@ class SelectionDAG {
10701070
SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs,
10711071
ArrayRef<SDValue> Ops);
10721072

1073+
/// Mutate the specified strict FP node to its non-strict equivalent,
1074+
/// unlinking the node from its chain and dropping the metadata arguments.
1075+
/// The node must be a strict FP node.
1076+
SDNode *mutateStrictFPToFP(SDNode *Node);
1077+
10731078
/// These are used for target selectors to create a new node
10741079
/// with specified return type(s), MachineInstr opcode, and operands.
10751080
///

‎llvm/include/llvm/CodeGen/SelectionDAGNodes.h

+26
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,32 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
612612
SDNodeBits.IsMemIntrinsic;
613613
}
614614

615+
/// Test if this node is a strict floating point pseudo-op.
616+
bool isStrictFPOpcode() {
617+
switch (NodeType) {
618+
default:
619+
return false;
620+
case ISD::STRICT_FADD:
621+
case ISD::STRICT_FSUB:
622+
case ISD::STRICT_FMUL:
623+
case ISD::STRICT_FDIV:
624+
case ISD::STRICT_FREM:
625+
case ISD::STRICT_FSQRT:
626+
case ISD::STRICT_FPOW:
627+
case ISD::STRICT_FPOWI:
628+
case ISD::STRICT_FSIN:
629+
case ISD::STRICT_FCOS:
630+
case ISD::STRICT_FEXP:
631+
case ISD::STRICT_FEXP2:
632+
case ISD::STRICT_FLOG:
633+
case ISD::STRICT_FLOG10:
634+
case ISD::STRICT_FLOG2:
635+
case ISD::STRICT_FRINT:
636+
case ISD::STRICT_FNEARBYINT:
637+
return true;
638+
}
639+
}
640+
615641
/// Test if this node has a post-isel opcode, directly
616642
/// corresponding to a MachineInstr opcode.
617643
bool isMachineOpcode() const { return NodeType < 0; }

‎llvm/include/llvm/IR/IntrinsicInst.h

+13
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ namespace llvm {
171171
ebStrict
172172
};
173173

174+
bool isUnaryOp() const;
174175
RoundingMode getRoundingMode() const;
175176
ExceptionBehavior getExceptionBehavior() const;
176177

@@ -182,6 +183,18 @@ namespace llvm {
182183
case Intrinsic::experimental_constrained_fmul:
183184
case Intrinsic::experimental_constrained_fdiv:
184185
case Intrinsic::experimental_constrained_frem:
186+
case Intrinsic::experimental_constrained_sqrt:
187+
case Intrinsic::experimental_constrained_pow:
188+
case Intrinsic::experimental_constrained_powi:
189+
case Intrinsic::experimental_constrained_sin:
190+
case Intrinsic::experimental_constrained_cos:
191+
case Intrinsic::experimental_constrained_exp:
192+
case Intrinsic::experimental_constrained_exp2:
193+
case Intrinsic::experimental_constrained_log:
194+
case Intrinsic::experimental_constrained_log10:
195+
case Intrinsic::experimental_constrained_log2:
196+
case Intrinsic::experimental_constrained_rint:
197+
case Intrinsic::experimental_constrained_nearbyint:
185198
return true;
186199
default: return false;
187200
}

‎llvm/include/llvm/IR/Intrinsics.td

+57-1
Original file line numberDiff line numberDiff line change
@@ -489,8 +489,64 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
489489
LLVMMatchType<0>,
490490
llvm_metadata_ty,
491491
llvm_metadata_ty ]>;
492+
493+
// These intrinsics are sensitive to the rounding mode so we need constrained
494+
// versions of each of them. When strict rounding and exception control are
495+
// not required the non-constrained versions of these intrinsics should be
496+
// used.
497+
def int_experimental_constrained_sqrt : Intrinsic<[ llvm_anyfloat_ty ],
498+
[ LLVMMatchType<0>,
499+
llvm_metadata_ty,
500+
llvm_metadata_ty ]>;
501+
def int_experimental_constrained_powi : Intrinsic<[ llvm_anyfloat_ty ],
502+
[ LLVMMatchType<0>,
503+
llvm_i32_ty,
504+
llvm_metadata_ty,
505+
llvm_metadata_ty ]>;
506+
def int_experimental_constrained_sin : Intrinsic<[ llvm_anyfloat_ty ],
507+
[ LLVMMatchType<0>,
508+
llvm_metadata_ty,
509+
llvm_metadata_ty ]>;
510+
def int_experimental_constrained_cos : Intrinsic<[ llvm_anyfloat_ty ],
511+
[ LLVMMatchType<0>,
512+
llvm_metadata_ty,
513+
llvm_metadata_ty ]>;
514+
def int_experimental_constrained_pow : Intrinsic<[ llvm_anyfloat_ty ],
515+
[ LLVMMatchType<0>,
516+
LLVMMatchType<0>,
517+
llvm_metadata_ty,
518+
llvm_metadata_ty ]>;
519+
def int_experimental_constrained_log : Intrinsic<[ llvm_anyfloat_ty ],
520+
[ LLVMMatchType<0>,
521+
llvm_metadata_ty,
522+
llvm_metadata_ty ]>;
523+
def int_experimental_constrained_log10: Intrinsic<[ llvm_anyfloat_ty ],
524+
[ LLVMMatchType<0>,
525+
llvm_metadata_ty,
526+
llvm_metadata_ty ]>;
527+
def int_experimental_constrained_log2 : Intrinsic<[ llvm_anyfloat_ty ],
528+
[ LLVMMatchType<0>,
529+
llvm_metadata_ty,
530+
llvm_metadata_ty ]>;
531+
def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ],
532+
[ LLVMMatchType<0>,
533+
llvm_metadata_ty,
534+
llvm_metadata_ty ]>;
535+
def int_experimental_constrained_exp2 : Intrinsic<[ llvm_anyfloat_ty ],
536+
[ LLVMMatchType<0>,
537+
llvm_metadata_ty,
538+
llvm_metadata_ty ]>;
539+
def int_experimental_constrained_rint : Intrinsic<[ llvm_anyfloat_ty ],
540+
[ LLVMMatchType<0>,
541+
llvm_metadata_ty,
542+
llvm_metadata_ty ]>;
543+
def int_experimental_constrained_nearbyint : Intrinsic<[ llvm_anyfloat_ty ],
544+
[ LLVMMatchType<0>,
545+
llvm_metadata_ty,
546+
llvm_metadata_ty ]>;
492547
}
493-
// FIXME: Add intrinsic for fcmp, fptrunc, fpext, fptoui and fptosi.
548+
// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
549+
// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round?
494550

495551

496552
//===------------------------- Expect Intrinsics --------------------------===//

‎llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+67
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,39 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
899899
}
900900
}
901901

902+
static TargetLowering::LegalizeAction
903+
getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
904+
unsigned EqOpc;
905+
switch (Opcode) {
906+
default: llvm_unreachable("Unexpected FP pseudo-opcode");
907+
case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
908+
case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
909+
case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
910+
case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
911+
case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
912+
case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
913+
case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
914+
case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
915+
case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
916+
case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
917+
case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
918+
case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
919+
}
920+
921+
auto Action = TLI.getOperationAction(EqOpc, VT);
922+
923+
// We don't currently handle Custom or Promote for strict FP pseudo-ops.
924+
// For now, we just expand for those cases.
925+
if (Action != TargetLowering::Legal)
926+
Action = TargetLowering::Expand;
927+
928+
// ISD::FPOWI returns 'Legal' even though it should be expanded.
929+
if (Opcode == ISD::STRICT_FPOWI && Action == TargetLowering::Legal)
930+
Action = TargetLowering::Expand;
931+
932+
return Action;
933+
}
934+
902935
/// Return a legal replacement for the given operation, with all legal operands.
903936
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
904937
DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
@@ -1043,6 +1076,25 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
10431076
return;
10441077
}
10451078
break;
1079+
case ISD::STRICT_FSQRT:
1080+
case ISD::STRICT_FPOW:
1081+
case ISD::STRICT_FPOWI:
1082+
case ISD::STRICT_FSIN:
1083+
case ISD::STRICT_FCOS:
1084+
case ISD::STRICT_FEXP:
1085+
case ISD::STRICT_FEXP2:
1086+
case ISD::STRICT_FLOG:
1087+
case ISD::STRICT_FLOG10:
1088+
case ISD::STRICT_FLOG2:
1089+
case ISD::STRICT_FRINT:
1090+
case ISD::STRICT_FNEARBYINT:
1091+
// These pseudo-ops get legalized as if they were their non-strict
1092+
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
1093+
// is also legal, but if ISD::FSQRT requires expansion then so does
1094+
// ISD::STRICT_FSQRT.
1095+
Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
1096+
Node->getValueType(0));
1097+
break;
10461098

10471099
default:
10481100
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2032,6 +2084,9 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
20322084
RTLIB::Libcall Call_F80,
20332085
RTLIB::Libcall Call_F128,
20342086
RTLIB::Libcall Call_PPCF128) {
2087+
if (Node->isStrictFPOpcode())
2088+
Node = DAG.mutateStrictFPToFP(Node);
2089+
20352090
RTLIB::Libcall LC;
20362091
switch (Node->getSimpleValueType(0).SimpleTy) {
20372092
default: llvm_unreachable("Unexpected request for libcall!");
@@ -3907,16 +3962,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
39073962
RTLIB::FMAX_PPCF128));
39083963
break;
39093964
case ISD::FSQRT:
3965+
case ISD::STRICT_FSQRT:
39103966
Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
39113967
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
39123968
RTLIB::SQRT_PPCF128));
39133969
break;
39143970
case ISD::FSIN:
3971+
case ISD::STRICT_FSIN:
39153972
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
39163973
RTLIB::SIN_F80, RTLIB::SIN_F128,
39173974
RTLIB::SIN_PPCF128));
39183975
break;
39193976
case ISD::FCOS:
3977+
case ISD::STRICT_FCOS:
39203978
Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
39213979
RTLIB::COS_F80, RTLIB::COS_F128,
39223980
RTLIB::COS_PPCF128));
@@ -3926,26 +3984,31 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
39263984
ExpandSinCosLibCall(Node, Results);
39273985
break;
39283986
case ISD::FLOG:
3987+
case ISD::STRICT_FLOG:
39293988
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
39303989
RTLIB::LOG_F80, RTLIB::LOG_F128,
39313990
RTLIB::LOG_PPCF128));
39323991
break;
39333992
case ISD::FLOG2:
3993+
case ISD::STRICT_FLOG2:
39343994
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
39353995
RTLIB::LOG2_F80, RTLIB::LOG2_F128,
39363996
RTLIB::LOG2_PPCF128));
39373997
break;
39383998
case ISD::FLOG10:
3999+
case ISD::STRICT_FLOG10:
39394000
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
39404001
RTLIB::LOG10_F80, RTLIB::LOG10_F128,
39414002
RTLIB::LOG10_PPCF128));
39424003
break;
39434004
case ISD::FEXP:
4005+
case ISD::STRICT_FEXP:
39444006
Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
39454007
RTLIB::EXP_F80, RTLIB::EXP_F128,
39464008
RTLIB::EXP_PPCF128));
39474009
break;
39484010
case ISD::FEXP2:
4011+
case ISD::STRICT_FEXP2:
39494012
Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
39504013
RTLIB::EXP2_F80, RTLIB::EXP2_F128,
39514014
RTLIB::EXP2_PPCF128));
@@ -3966,11 +4029,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
39664029
RTLIB::CEIL_PPCF128));
39674030
break;
39684031
case ISD::FRINT:
4032+
case ISD::STRICT_FRINT:
39694033
Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
39704034
RTLIB::RINT_F80, RTLIB::RINT_F128,
39714035
RTLIB::RINT_PPCF128));
39724036
break;
39734037
case ISD::FNEARBYINT:
4038+
case ISD::STRICT_FNEARBYINT:
39744039
Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
39754040
RTLIB::NEARBYINT_F64,
39764041
RTLIB::NEARBYINT_F80,
@@ -3985,11 +4050,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
39854050
RTLIB::ROUND_PPCF128));
39864051
break;
39874052
case ISD::FPOWI:
4053+
case ISD::STRICT_FPOWI:
39884054
Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
39894055
RTLIB::POWI_F80, RTLIB::POWI_F128,
39904056
RTLIB::POWI_PPCF128));
39914057
break;
39924058
case ISD::FPOW:
4059+
case ISD::STRICT_FPOW:
39934060
Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
39944061
RTLIB::POW_F80, RTLIB::POW_F128,
39954062
RTLIB::POW_PPCF128));

‎llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+57
Original file line numberDiff line numberDiff line change
@@ -6542,6 +6542,63 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
65426542
return N;
65436543
}
65446544

6545+
SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
6546+
unsigned OrigOpc = Node->getOpcode();
6547+
unsigned NewOpc;
6548+
bool IsUnary = false;
6549+
switch (OrigOpc) {
6550+
default:
6551+
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
6552+
case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
6553+
case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
6554+
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
6555+
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
6556+
case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
6557+
case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
6558+
case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
6559+
case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
6560+
case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
6561+
case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
6562+
case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
6563+
case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
6564+
case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
6565+
case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
6566+
case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
6567+
case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
6568+
case ISD::STRICT_FNEARBYINT:
6569+
NewOpc = ISD::FNEARBYINT;
6570+
IsUnary = true;
6571+
break;
6572+
}
6573+
6574+
// We're taking this node out of the chain, so we need to re-link things.
6575+
SDValue InputChain = Node->getOperand(0);
6576+
SDValue OutputChain = SDValue(Node, 1);
6577+
ReplaceAllUsesOfValueWith(OutputChain, InputChain);
6578+
6579+
SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
6580+
SDNode *Res = nullptr;
6581+
if (IsUnary)
6582+
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
6583+
else
6584+
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
6585+
Node->getOperand(2) });
6586+
6587+
// MorphNodeTo can operate in two ways: if an existing node with the
6588+
// specified operands exists, it can just return it. Otherwise, it
6589+
// updates the node in place to have the requested operands.
6590+
if (Res == Node) {
6591+
// If we updated the node in place, reset the node ID. To the isel,
6592+
// this should be just like a newly allocated machine node.
6593+
Res->setNodeId(-1);
6594+
} else {
6595+
ReplaceAllUsesWith(Node, Res);
6596+
RemoveDeadNode(Node);
6597+
}
6598+
6599+
return Res;
6600+
}
6601+
65456602

65466603
/// getMachineNode - These are used for target selectors to create a new node
65476604
/// with specified return type(s), MachineInstr opcode, and operands.

0 commit comments

Comments
 (0)
Please sign in to comment.