Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -651,7 +651,7 @@ SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, - bool nuw = false, bool nsw = false, bool exact = false); + const SDNodeFlags *Flags = nullptr); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, @@ -970,8 +970,7 @@ /// Get the specified node if it's already available, or else return NULL. SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, ArrayRef Ops, - bool nuw = false, bool nsw = false, - bool exact = false); + const SDNodeFlags *Flags = nullptr); /// Creates a SDDbgValue node. SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, @@ -1228,9 +1227,8 @@ void allnodes_clear(); - BinarySDNode *GetBinarySDNode(unsigned Opcode, SDLoc DL, SDVTList VTs, - SDValue N1, SDValue N2, bool nuw, bool nsw, - bool exact); + SDNode *GetSDNodeWithFlags(unsigned Opcode, SDLoc DL, SDVTList VTs, + ArrayRef Ops, const SDNodeFlags *Flags); /// List of non-single value types. FoldingSet VTListMap; Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -926,6 +926,101 @@ if (N) N->addUse(*this); } +/// Returns true if the opcode is an operation with optional optimization flags. +static bool mayHaveOptimizationFlags(unsigned Opcode) { + switch (Opcode) { + case ISD::SDIV: + case ISD::UDIV: + case ISD::SRA: + case ISD::SRL: + case ISD::MUL: + case ISD::ADD: + case ISD::SUB: + case ISD::SHL: + case ISD::FADD: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FREM: + case ISD::FSUB: + return true; + default: + return false; + } +} + +struct SDNodeFlags { +private: + bool NoUnsignedWrap : 1; + bool NoSignedWrap : 1; + bool Exact : 1; + bool UnsafeAlgebra : 1; + bool NoNaNs : 1; + bool NoInfs : 1; + bool NoSignedZeros : 1; + bool AllowReciprocal : 1; + +public: + SDNodeFlags() { + NoUnsignedWrap = false; + NoSignedWrap = false; + Exact = false; + UnsafeAlgebra = false; + NoNaNs = false; + NoInfs = false; + NoSignedZeros = false; + AllowReciprocal = false; + } + + void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } + void setNoSignedWrap(bool b) { NoSignedWrap = b; } + void setExact(bool b) { Exact = b; } + void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; } + void setNoNaNs(bool b) { NoNaNs = b; } + void setNoInfs(bool b) { NoInfs = b; } + void setNoSignedZeros(bool b) { NoSignedZeros = b; } + void setAllowReciprocal(bool b) { AllowReciprocal = b; } + + bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } + bool hasNoSignedWrap() const { return NoSignedWrap; } + bool hasExact() const { return Exact; } + bool hasUnsafeAlgebra() const { return UnsafeAlgebra; } + bool hasNoNaNs()const { return NoNaNs; } + bool hasNoInfs() const { return NoInfs; } + bool hasNoSignedZeros() const { return NoSignedZeros; } + bool hasAllowReciprocal() const { return AllowReciprocal; } + + /// Return a raw encoding of the flags. + /// This function should only be used to add data to the NodeID value. + unsigned getRawFlags() const { + return (NoUnsignedWrap << 0) | + (NoSignedWrap << 1) | + (Exact << 2) | + (UnsafeAlgebra << 3) | + (NoNaNs << 4) | + (NoInfs << 5) | + (NoSignedZeros << 6) | + (AllowReciprocal << 7) ; + } +}; + +/// This class is an extension of SDNode used from instructions that may have +/// associated extra flags. +class SDNodeWithFlags : public SDNode { +public: + SDNodeFlags Flags; + SDNodeWithFlags(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + ArrayRef Ops, SDNodeFlags NodeFlags) + : SDNode(Opc, Order, dl, VTs, Ops) { + Flags = NodeFlags; + } + + // This is used to implement dyn_cast, isa, and other type queries. + static bool classof(const SDNode *N) { + return mayHaveOptimizationFlags(N->getOpcode()); + } +}; + + /// This class is used for single-operand SDNodes. This is solely /// to allow co-allocation of node operands with the node itself. class UnarySDNode : public SDNode { @@ -950,52 +1045,6 @@ } }; -/// Returns true if the opcode is a binary operation with flags. -static bool isBinOpWithFlags(unsigned Opcode) { - switch (Opcode) { - case ISD::SDIV: - case ISD::UDIV: - case ISD::SRA: - case ISD::SRL: - case ISD::MUL: - case ISD::ADD: - case ISD::SUB: - case ISD::SHL: - return true; - default: - return false; - } -} - -/// This class is an extension of BinarySDNode -/// used from those opcodes that have associated extra flags. -class BinaryWithFlagsSDNode : public BinarySDNode { - enum { NUW = (1 << 0), NSW = (1 << 1), EXACT = (1 << 2) }; - -public: - BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - SDValue X, SDValue Y) - : BinarySDNode(Opc, Order, dl, VTs, X, Y) {} - /// Return the SubclassData value, which contains an encoding of the flags. - /// This function should be used to add subclass data to the NodeID value. - unsigned getRawSubclassData() const { return SubclassData; } - void setHasNoUnsignedWrap(bool b) { - SubclassData = (SubclassData & ~NUW) | (b ? NUW : 0); - } - void setHasNoSignedWrap(bool b) { - SubclassData = (SubclassData & ~NSW) | (b ? NSW : 0); - } - void setIsExact(bool b) { - SubclassData = (SubclassData & ~EXACT) | (b ? EXACT : 0); - } - bool hasNoUnsignedWrap() const { return SubclassData & NUW; } - bool hasNoSignedWrap() const { return SubclassData & NSW; } - bool isExact() const { return SubclassData & EXACT; } - static bool classof(const SDNode *N) { - return isBinOpWithFlags(N->getOpcode()); - } -}; - /// This class is used for three-operand SDNodes. This is solely /// to allow co-allocation of node operands with the node itself. class TernarySDNode : public SDNode { Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1443,11 +1443,9 @@ if (isa(N0) || !isa(N1)) { SDValue Ops[] = {N1, N0}; SDNode *CSENode; - if (const BinaryWithFlagsSDNode *BinNode = - dyn_cast(N)) { - CSENode = DAG.getNodeIfExists( - N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); + if (auto *FlagsNode = dyn_cast(N)) { + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), + Ops, &FlagsNode->Flags); } else { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); } @@ -7803,6 +7801,10 @@ } } + bool AllowRecip = false; + if (const auto *NodeWithFlags = dyn_cast(N)) + AllowRecip = NodeWithFlags->Flags.hasAllowReciprocal(); + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) @@ -7810,7 +7812,7 @@ // may have different costs for FDIV and FMUL, so sometimes the cost of two // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || AllowRecip) { // Skip if current node is a reciprocal. if (N0CFP && N0CFP->isExactlyValue(1.0)) return SDValue(); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -400,18 +400,22 @@ } } -static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw, - bool exact) { - ID.AddBoolean(nuw); - ID.AddBoolean(nsw); - ID.AddBoolean(exact); +// Add logical or fast math flag values to FoldingSetNodeID value. +static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, + const SDNodeFlags *Flags) { + if (!Flags || !mayHaveOptimizationFlags(Opcode)) + return; + + unsigned RawFlags = Flags->getRawFlags(); + // If no flags are set, do not alter the ID. This saves time and allows + // a gradual increase in API usage of the optional optimization flags. + if (RawFlags != 0) + ID.AddInteger(RawFlags); } -/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos -static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode, - bool nuw, bool nsw, bool exact) { - if (isBinOpWithFlags(Opcode)) - AddBinaryNodeIDCustom(ID, nuw, nsw, exact); +static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { + if (auto *Node = dyn_cast(N)) + AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags); } static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, @@ -506,19 +510,6 @@ ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SRA: - case ISD::SRL: - case ISD::MUL: - case ISD::ADD: - case ISD::SUB: - case ISD::SHL: { - const BinaryWithFlagsSDNode *BinNode = cast(N); - AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); - break; - } case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: @@ -562,6 +553,8 @@ } } // end switch (N->getOpcode()) + AddNodeIDFlags(ID, N); + // Target specific memory nodes could also have address spaces to check. if (N->isTargetMemoryOpcode()) ID.AddInteger(cast(N)->getPointerInfo().getAddrSpace()); @@ -956,25 +949,19 @@ DeallocateNode(AllNodes.begin()); } -BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, - SDVTList VTs, SDValue N1, - SDValue N2, bool nuw, bool nsw, - bool exact) { - if (isBinOpWithFlags(Opcode)) { - BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( - Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); - FN->setHasNoUnsignedWrap(nuw); - FN->setHasNoSignedWrap(nsw); - FN->setIsExact(exact); - - return FN; +SDNode *SelectionDAG::GetSDNodeWithFlags(unsigned Opcode, SDLoc DL, + SDVTList VTs, ArrayRef Ops, + const SDNodeFlags *Flags) { + if (Flags && mayHaveOptimizationFlags(Opcode)) { + SDNodeWithFlags *NodeWithFlags = new (NodeAllocator) SDNodeWithFlags( + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, *Flags); + return NodeWithFlags; } - - BinarySDNode *N = new (NodeAllocator) - BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Ops); return N; } - void SelectionDAG::clear() { allnodes_clear(); OperandAllocator.Reset(); @@ -3172,7 +3159,7 @@ } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2, bool nuw, bool nsw, bool exact) { + SDValue N2, const SDNodeFlags *Flags) { ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); switch (Opcode) { @@ -3618,24 +3605,23 @@ } // Memoize this node if possible. - BinarySDNode *N; + SDNode *N; SDVTList VTs = getVTList(VT); - const bool BinOpHasFlags = isBinOpWithFlags(Opcode); + SDValue Ops[] = { N1, N2 }; if (VT != MVT::Glue) { SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); - if (BinOpHasFlags) - AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); + AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); - + N = GetSDNodeWithFlags(Opcode, DL, VTs, Ops, Flags); + CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); + N = GetSDNodeWithFlags(Opcode, DL, VTs, Ops, Flags); } InsertNode(N); @@ -5888,13 +5874,12 @@ /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef Ops, bool nuw, bool nsw, - bool exact) { + ArrayRef Ops, + const SDNodeFlags *Flags) { if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); - if (isBinOpWithFlags(Opcode)) - AddBinaryNodeIDCustom(ID, nuw, nsw, exact); + AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return E; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2832,6 +2832,8 @@ bool nuw = false; bool nsw = false; bool exact = false; + FastMathFlags FMF; + if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); @@ -2841,8 +2843,20 @@ dyn_cast(&I)) exact = ExactOp->isExact(); + if (const FPMathOperator *FPOp = dyn_cast(&I)) + FMF = FPOp->getFastMathFlags(); + + SDNodeFlags Flags; + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setExact(exact); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedWrap(nsw); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setNoUnsignedWrap(nuw); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, nuw, nsw, exact); + Op1, Op2, &Flags); setValue(&I, BinNodeValue); } @@ -2890,9 +2904,12 @@ dyn_cast(&I)) exact = ExactOp->isExact(); } - + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - nuw, nsw, exact); + &Flags); setValue(&I, Res); } Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2650,8 +2650,9 @@ if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, - true); + SDNodeFlags Flags; + Flags.setExact(true); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); d = d.ashr(ShAmt); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -12493,9 +12493,8 @@ case ISD::SUB: case ISD::MUL: case ISD::SHL: { - const BinaryWithFlagsSDNode *BinNode = - cast(Op.getNode()); - if (BinNode->hasNoSignedWrap()) + const SDNodeWithFlags *Node = cast(Op.getNode()); + if (Node->Flags.hasNoSignedWrap()) break; } default: Index: test/CodeGen/PowerPC/fdiv-combine.ll =================================================================== --- test/CodeGen/PowerPC/fdiv-combine.ll +++ test/CodeGen/PowerPC/fdiv-combine.ll @@ -7,6 +7,34 @@ ; => ; recip = 1.0 / D; a * recip; b * recip; c * recip; +define void @three_fdiv_double_arcp(double %D, double %a, double %b, double %c) { +; CHECK-LABEL: three_fdiv_double_arcp: +; CHECK: fdiv +; CHECK-NEXT-NOT: fdiv +; CHECK: fmul +; CHECK: fmul +; CHECK: fmul + %div = fdiv arcp double %a, %D + %div1 = fdiv arcp double %b, %D + %div2 = fdiv arcp double %c, %D + tail call void @foo_3d(double %div, double %div1, double %div2) + ret void +} + +define void @two_fdiv_double_arcp(double %D, double %a, double %b) { +; CHECK-LABEL: two_fdiv_double_arcp: +; CHECK: fdiv +; CHECK: fdiv +; CHECK-NEXT-NOT: fmul + %div = fdiv arcp double %a, %D + %div1 = fdiv arcp double %b, %D + tail call void @foo_2d(double %div, double %div1) + ret void +} + +; Repeat the tests to check for recognition of a function-level +; attribute rather than an instruction-level flag. + define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 { ; CHECK-LABEL: three_fdiv_double: ; CHECK: fdiv @@ -37,3 +65,4 @@ declare void @foo_2d(double, double) attributes #0 = { "unsafe-fp-math"="true" } +