Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -240,6 +240,10 @@ /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, + /// Simple binary floating point operators with side effects that have token + /// chains as their first operand. + FADD_W_CHAIN, FSUB_W_CHAIN, FMUL_W_CHAIN, FDIV_W_CHAIN, FREM_W_CHAIN, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, @@ -938,6 +942,15 @@ } // end llvm::ISD namespace +/// Returns true if the opcode is one of floating-operations with chain. +inline bool isFPOpWithChain(unsigned OpCode) { + return OpCode == ISD::FADD_W_CHAIN || + OpCode == ISD::FSUB_W_CHAIN || + OpCode == ISD::FDIV_W_CHAIN || + OpCode == ISD::FREM_W_CHAIN || + OpCode == ISD::FMUL_W_CHAIN; +} + } // end llvm namespace #endif Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -675,7 +675,7 @@ SDValue getNode(unsigned Opcode, SDLoc DL, ArrayRef ResultTys, ArrayRef Ops); SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, - ArrayRef Ops); + ArrayRef Ops, const SDNodeFlags *Flags = nullptr); // Specialize based on number of operands. SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT); @@ -684,6 +684,8 @@ const SDNodeFlags *Flags = nullptr); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3); + SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue Chain, + SDValue N1, SDValue N2, const SDNodeFlags *Flags); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, @@ -1223,10 +1225,13 @@ /// Utility function used by legalize and lowering to /// "unroll" a vector operation by splitting out the scalars and operating /// on each element individually. If the ResNE is 0, fully unroll the vector - /// op. If ResNE is less than the width of the vector op, unroll up to ResNE. - /// If the ResNE is greater than the width of the vector op, unroll the - /// vector op and fill the end of the resulting vector with UNDEFS. - SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0); + /// op. If ResNE is less than the width of the vector op, unroll up to ResNE. + /// If the ResNE is greater than the width of the vector op, unroll the + /// vector op and fill the end of the resulting vector with UNDEFS. Optional + /// TokenFactor argument can be set for floating-point instructions that have + /// chains. + SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0, + SDValue *TokenFactor = nullptr); /// Return true if LD is loading 'Bytes' bytes from a location that is 'Dist' /// units away from the location that the 'Base' load is loading from. @@ -1287,6 +1292,10 @@ SDValue N1, SDValue N2, const SDNodeFlags *Flags = nullptr); + BinarySDNode *GetBinarySDNode(unsigned Opcode, SDLoc DL, SDVTList VTs, + SDValue Chain, SDValue N1, SDValue N2, + const SDNodeFlags *Flags = nullptr); + /// Look up the node specified by ID in CSEMap. If it exists, return it. If /// not, return the insertion token that will make insertion faster. This /// overload is for nodes other than Constant or ConstantFP, use the other one Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -114,7 +114,7 @@ SDNode *getNode() const { return Node; } /// set the SDNode - void setNode(SDNode *N) { Node = N; } + inline void setNode(SDNode *N); inline SDNode *operator->() const { return Node; } @@ -931,6 +931,11 @@ // Define inline functions from the SDValue class. +void SDValue::setNode(SDNode *N) { + assert((!N || ResNo < N->getNumValues()) && "Wrong ResNo for new node."); + Node = N; +} + inline SDValue::SDValue(SDNode *node, unsigned resno) : Node(node), ResNo(resno) { assert((!Node || ResNo < Node->getNumValues()) && @@ -1017,13 +1022,18 @@ /// This class is used for two-operand SDNodes. This is solely /// to allow co-allocation of node operands with the node itself. class BinarySDNode : public SDNode { - SDUse Ops[2]; + SDUse Ops[3]; public: BinarySDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, SDValue X, SDValue Y) : SDNode(Opc, Order, dl, VTs) { InitOperands(Ops, X, Y); } + BinarySDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + SDValue Chain, SDValue X, SDValue Y) + : SDNode(Opc, Order, dl, VTs) { + InitOperands(Ops, Chain, X, Y); + } }; /// Returns true if the opcode is a binary operation with flags. @@ -1042,6 +1052,13 @@ case ISD::FMUL: case ISD::FREM: case ISD::FSUB: + + case ISD::FADD_W_CHAIN: + case ISD::FSUB_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: + case ISD::FMUL_W_CHAIN: + return true; default: return false; @@ -1056,6 +1073,10 @@ BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, SDValue X, SDValue Y, const SDNodeFlags &NodeFlags) : BinarySDNode(Opc, Order, dl, VTs, X, Y), Flags(NodeFlags) {} + BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + SDValue Chain, SDValue X, SDValue Y, + const SDNodeFlags &NodeFlags) + : BinarySDNode(Opc, Order, dl, VTs, Chain, X, Y), Flags(NodeFlags) {} static bool classof(const SDNode *N) { return isBinOpWithFlags(N->getOpcode()); } Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -67,6 +67,15 @@ Ord == SequentiallyConsistent); } +/// Returns true if the opcode is one of floating-operations with chain. +inline bool isFPOpWithChain(unsigned OpCode) { + return OpCode == ISD::FADD_W_CHAIN || + OpCode == ISD::FSUB_W_CHAIN || + OpCode == ISD::FDIV_W_CHAIN || + OpCode == ISD::FREM_W_CHAIN || + OpCode == ISD::FMUL_W_CHAIN; +} + //===----------------------------------------------------------------------===// // AllocaInst Class //===----------------------------------------------------------------------===// Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -137,15 +137,23 @@ def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_sse_addwchain_ss : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], [IntrReadMem]>; def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_sse_subwchain_ss : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_sse_mulwchain_ss : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_sse_divwchain_ss : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; @@ -286,15 +294,23 @@ def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_sse2_addwchain_sd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_sse2_subwchain_sd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_sse2_mulwchain_sd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_sse2_divwchain_sd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -420,6 +420,11 @@ def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>; def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>; def frem : SDNode<"ISD::FREM" , SDTFPBinOp>; +def faddwchain : SDNode<"ISD::FADD_W_CHAIN", SDTFPBinOp, [SDNPCommutative, SDNPHasChain, SDNPSideEffect]>; +def fsubwchain : SDNode<"ISD::FSUB_W_CHAIN", SDTFPBinOp, [SDNPHasChain, SDNPSideEffect]>; +def fmulwchain : SDNode<"ISD::FMUL_W_CHAIN", SDTFPBinOp, [SDNPCommutative, SDNPHasChain, SDNPSideEffect]>; +def fdivwchain : SDNode<"ISD::FDIV_W_CHAIN", SDTFPBinOp, [SDNPHasChain, SDNPSideEffect]>; +def fremwchain : SDNode<"ISD::FREM_W_CHAIN", SDTFPBinOp, [SDNPHasChain, SDNPSideEffect]>; def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>; def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>; def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>; Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -281,6 +281,7 @@ SDValue visitFMUL(SDNode *N); SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); + SDValue visitFDIV_W_CHAIN(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); @@ -1328,11 +1329,16 @@ // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); - if (N->getNumValues() == RV.getNode()->getNumValues()) + if (N->getNumValues() == RV.getNode()->getNumValues()) { DAG.ReplaceAllUsesWith(N, RV.getNode()); - else { - assert(N->getValueType(0) == RV.getValueType() && - N->getNumValues() == 1 && "Type mismatch"); + } else if (N->getNumValues() == 2 && N->getValueType(1) == MVT::Other) { + assert(N->getValueType(0) == RV.getValueType() && "Type mismatch"); + assert(N->getOpcode() == ISD::FDIV_W_CHAIN && "Wrong opcode"); + SDValue OpV[] = { RV, DAG.getEntryNode() }; + DAG.ReplaceAllUsesWith(N, OpV); + } else { + assert(N->getValueType(0) == RV.getValueType() && "Type mismatch"); + assert(N->getNumValues() == 1 && "Type mismatch"); SDValue OpV = RV; DAG.ReplaceAllUsesWith(N, &OpV); } @@ -1411,6 +1417,7 @@ case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); + case ISD::FDIV_W_CHAIN: return visitFDIV_W_CHAIN(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); @@ -8676,6 +8683,27 @@ return SDValue(); } +SDValue DAGCombiner::visitFDIV_W_CHAIN(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N0 = N->getOperand(1); + SDValue N1 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags *Flags = &cast(N)->Flags; + + if (Options.UnsafeFPMath) { + // Fold into a reciprocal estimate and multiply instead of a real divide. + if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL_W_CHAIN, DL, DAG.getVTList(VT, MVT::Other), + Chain, N0, RV, Flags); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -119,6 +119,12 @@ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); + std::pair ExpandChainFPLibCall(SDNode *Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -1410,6 +1416,16 @@ return; } + // Handle selection of a node with a chain into node without one. + if (Node->getNumValues() == 2 && Res->getNumValues() == 1 && + Node->getValueType(1) == MVT::Other) { + SDValue ResultVals[] = { Res.getValue(0), DAG.getEntryNode() }; + ReplaceNode(Node, &ResultVals[0]); + return; + } + + assert(Node->getNumValues() <= Res->getNumValues() && + "Too few values in new node!"); SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); @@ -2291,12 +2307,11 @@ return CallInfo; } -SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128) { +static RTLIB::Libcall GetFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2306,9 +2321,32 @@ case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } + return LC; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC = GetFPLibCall(Node, Call_F32, Call_F64, Call_F80, + Call_F128, Call_PPCF128); return ExpandLibCall(LC, Node, false); } +std::pair SelectionDAGLegalize::ExpandChainFPLibCall( + SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC = GetFPLibCall(Node, Call_F32, Call_F64, Call_F80, + Call_F128, Call_PPCF128); + return ExpandChainLibCall(LC, Node, false); +} + SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -3946,6 +3984,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { SmallVector Results; + std::pair CallInfo; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; unsigned Opc = Node->getOpcode(); @@ -4110,11 +4149,25 @@ RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128)); break; + case ISD::FDIV_W_CHAIN: + CallInfo = ExpandChainFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128); + Results.push_back(CallInfo.first); + Results.push_back(CallInfo.second); + break; case ISD::FREM: Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128)); break; + case ISD::FREM_W_CHAIN: + CallInfo = ExpandChainFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128); + Results.push_back(CallInfo.first); + Results.push_back(CallInfo.second); + break; case ISD::FMA: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, @@ -4125,11 +4178,25 @@ RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128)); break; + case ISD::FADD_W_CHAIN: + CallInfo = ExpandChainFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128); + Results.push_back(CallInfo.first); + Results.push_back(CallInfo.second); + break; case ISD::FMUL: Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128)); break; + case ISD::FMUL_W_CHAIN: + CallInfo = ExpandChainFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128); + Results.push_back(CallInfo.first); + Results.push_back(CallInfo.second); + break; case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); @@ -4147,6 +4214,13 @@ RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128)); break; + case ISD::FSUB_W_CHAIN: + CallInfo = ExpandChainFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128); + Results.push_back(CallInfo.first); + Results.push_back(CallInfo.second); + break; case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SREM_I8, @@ -4398,6 +4472,20 @@ Tmp3, DAG.getIntPtrConstant(0, dl))); break; } + case ISD::FADD_W_CHAIN: + case ISD::FSUB_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: + case ISD::FMUL_W_CHAIN: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other), + Node->getOperand(0), Tmp1, Tmp2, Node->getFlags()); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0, dl))); + Results.push_back(Tmp3.getValue(1)); + break; + } case ISD::FMA: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -77,10 +77,14 @@ case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FADD_W_CHAIN: + R = SoftenFloatRes_FADD_W_CHAIN(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FDIV_W_CHAIN: + R = SoftenFloatRes_FDIV_W_CHAIN(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; @@ -89,6 +93,8 @@ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FMUL_W_CHAIN: + R = SoftenFloatRes_FMUL_W_CHAIN(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; @@ -97,11 +103,15 @@ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FREM_W_CHAIN: + R = SoftenFloatRes_FREM_W_CHAIN(N); break; case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FSUB_W_CHAIN: + R = SoftenFloatRes_FSUB_W_CHAIN(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; @@ -215,6 +225,25 @@ NVT, Ops, false, SDLoc(N)).first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD_W_CHAIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { N->getOperand(0), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + NVT, Ops, false, SDLoc(N)); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + return Val; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -303,6 +332,25 @@ NVT, Ops, false, SDLoc(N)).first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV_W_CHAIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { N->getOperand(0), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + NVT, Ops, false, SDLoc(N)); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + return Val; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -402,6 +450,25 @@ NVT, Ops, false, SDLoc(N)).first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL_W_CHAIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { N->getOperand(0), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + NVT, Ops, false, SDLoc(N)); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + return Val; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -531,6 +598,25 @@ NVT, Ops, false, SDLoc(N)).first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM_W_CHAIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { N->getOperand(0), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128), + NVT, Ops, false, SDLoc(N)); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + return Val; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -592,6 +678,25 @@ NVT, Ops, false, SDLoc(N)).first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB_W_CHAIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { N->getOperand(0), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, false, SDLoc(N)); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + return Val; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); if (N->getValueType(0) == MVT::f16) @@ -985,11 +1090,14 @@ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; + case ISD::FADD_W_CHAIN: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FDIV_W_CHAIN: + ExpandFloatRes_FDIV_W_CHAIN(N, Lo, Hi); break; case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; @@ -998,6 +1106,8 @@ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FMUL_W_CHAIN: + ExpandFloatRes_FMUL_W_CHAIN(N, Lo, Hi); break; case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; @@ -1008,10 +1118,13 @@ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FSUB_W_CHAIN: + ExpandFloatRes_FSUB_W_CHAIN(N, Lo, Hi); break; case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::FREM_W_CHAIN: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; } @@ -1125,6 +1238,23 @@ GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FDIV_W_CHAIN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, false, SDLoc(N)); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + GetPairElements(Val, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1213,6 +1343,23 @@ GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMUL_W_CHAIN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, false, SDLoc(N)); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + GetPairElements(Val, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1328,6 +1475,21 @@ GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FSUB_W_CHAIN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Val, Ch; + std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, false, SDLoc(N)); + ReplaceValueWith(SDValue(N, 1), Ch); + GetPairElements(Val, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1862,6 +2024,13 @@ case ISD::FREM: case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + case ISD::FADD_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FMUL_W_CHAIN: + case ISD::FREM_W_CHAIN: + case ISD::FSUB_W_CHAIN: + R = PromoteFloatRes_BinOpWithChain(N); break; + case ISD::FMA: // FMA is same as FMAD case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; @@ -2009,6 +2178,27 @@ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); } +// Binary operations with a chain where the result and both operands have +// PromoteFloat type action. Construct a new SDNode with the promoted float +// values of the old operands. +SDValue DAGTypeLegalizer::PromoteFloatRes_BinOpWithChain(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Chain = N->getOperand(0); + SDValue Op0 = GetPromotedFloat(N->getOperand(1)); + SDValue Op1 = GetPromotedFloat(N->getOperand(2)); + + SDValue R = DAG.getNode(N->getOpcode(), SDLoc(N), + DAG.getVTList(NVT, MVT::Other), Chain, Op0, Op1, + N->getFlags()); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(N, 1), R.getValue(1)); + + return R.getValue(0); +} + SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -426,10 +426,12 @@ SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FADD_W_CHAIN(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FDIV_W_CHAIN(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); SDValue SoftenFloatRes_FEXP2(SDNode *N); SDValue SoftenFloatRes_FFLOOR(SDNode *N); @@ -438,6 +440,7 @@ SDValue SoftenFloatRes_FLOG10(SDNode *N); SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FMUL_W_CHAIN(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); @@ -446,11 +449,13 @@ SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_FPOWI(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FREM_W_CHAIN(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FSUB_W_CHAIN(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); @@ -491,37 +496,40 @@ // Float Result Expansion. void ExpandFloatResult(SDNode *N, unsigned ResNo); - void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_ConstantFP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV_W_CHAIN(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL_W_CHAIN(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB_W_CHAIN(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP (SDNode *N, SDValue &Lo, SDValue &Hi); // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); @@ -553,6 +561,7 @@ void PromoteFloatResult(SDNode *N, unsigned ResNo); SDValue PromoteFloatRes_BITCAST(SDNode *N); SDValue PromoteFloatRes_BinOp(SDNode *N); + SDValue PromoteFloatRes_BinOpWithChain(SDNode *N); SDValue PromoteFloatRes_ConstantFP(SDNode *N); SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); @@ -594,6 +603,7 @@ void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_BinOpWithChain(SDNode *N); SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); @@ -642,6 +652,7 @@ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned OpNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BinOpWithChain(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1061,23 +1061,32 @@ bool isSigned) { unsigned NumOps = N->getNumOperands(); SDLoc dl(N); + if (NumOps == 0) { return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, - dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } - SmallVector Ops(NumOps); - for (unsigned i = 0; i < NumOps; ++i) - Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; + bool HasChain = isFPOpWithChain(N->getOpcode()); + + SmallVector Ops(HasChain ? NumOps - 1 : NumOps); + for (unsigned i = HasChain ? 1 : 0; i < NumOps; ++i) + Ops[HasChain ? i - 1 : i] = N->getOperand(i); + + std::pair CallInfo = + TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl); + + if (HasChain) + ReplaceValueWith(SDValue(N, 1), CallInfo.second); + + return CallInfo.first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -151,6 +151,8 @@ if (!HasVectors) return false; + SDValue OldRoot = DAG.getRoot(); + // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we // could just start legalizing on the root and traverse the whole graph. In @@ -159,11 +161,15 @@ // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) - LegalizeOp(SDValue(&*I, 0)); + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { + // Preserve result number on the root node so that old and new nodes could + // be compared correctly (same node, but different result number will make + // comparison fail). + unsigned ResNo = ((SDNode*)I == OldRoot.getNode() ? OldRoot.getResNo() : 0); + LegalizeOp(SDValue(&*I, ResNo)); + } // Finally, it's possible the root changed. Get the new root. - SDValue OldRoot = DAG.getRoot(); assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); DAG.setRoot(LegalizedNodes[OldRoot]); @@ -192,10 +198,16 @@ // Legalize the operands SmallVector Ops; - for (const SDValue &Op : Node->op_values()) - Ops.push_back(LegalizeOp(Op)); + for (const SDValue &Op : Node->op_values()) { + if (isFPOpWithChain(Op.getOpcode()) && Op.getValueType() == MVT::Other) + // Do not attempt to legalize chain. + Ops.push_back(Op); + else + Ops.push_back(LegalizeOp(Op)); + } - SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), + Op.getResNo()); bool HasVectorValue = false; if (Op.getOpcode() == ISD::LOAD) { @@ -332,6 +344,11 @@ case ISD::UMAX: case ISD::UABSDIFF: case ISD::SABSDIFF: + case ISD::FADD_W_CHAIN: + case ISD::FSUB_W_CHAIN: + case ISD::FMUL_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -401,7 +418,9 @@ // 2) Extending a vector of floats to a vector of the same number of larger // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. MVT VT = Op.getSimpleValueType(); - assert(Op.getNode()->getNumValues() == 1 && + assert(((isFPOpWithChain(Op.getOpcode()) && + Op.getNode()->getNumValues() == 2) || + Op.getNode()->getNumValues() == 1) && "Can't promote a vector with multiple results!"); MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); SDLoc dl(Op); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -111,7 +111,6 @@ case ISD::FMAXNUM: case ISD::FMINNAN: case ISD::FMAXNAN: - case ISD::FPOW: case ISD::FREM: case ISD::FSUB: @@ -128,6 +127,13 @@ case ISD::SRL: R = ScalarizeVecRes_BinOp(N); break; + case ISD::FADD_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FMUL_W_CHAIN: + case ISD::FREM_W_CHAIN: + case ISD::FSUB_W_CHAIN: + R = ScalarizeVecRes_BinOpWithChain(N); + break; case ISD::FMA: R = ScalarizeVecRes_TernaryOp(N); break; @@ -145,6 +151,20 @@ LHS.getValueType(), LHS, RHS, N->getFlags()); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOpWithChain(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + SDValue RHS = GetScalarizedVector(N->getOperand(2)); + SDValue Ch = DAG.getNode(N->getOpcode(), SDLoc(N), + DAG.getVTList(LHS.getValueType(), MVT::Other), + N->getOperand(0), LHS, RHS, N->getFlags()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch.getValue(1)); + + return Ch.getValue(0); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); @@ -688,6 +708,13 @@ case ISD::SABSDIFF: SplitVecRes_BinOp(N, Lo, Hi); break; + case ISD::FADD_W_CHAIN: + case ISD::FSUB_W_CHAIN: + case ISD::FMUL_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: + SplitVecRes_BinOpWithChain(N, Lo, Hi); + break; case ISD::FMA: SplitVecRes_TernaryOp(N, Lo, Hi); break; @@ -712,6 +739,35 @@ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); } +void DAGTypeLegalizer::SplitVecRes_BinOpWithChain(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(1), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(2), RHSLo, RHSHi); + SDLoc dl(N); + + const SDNodeFlags *Flags = N->getFlags(); + SDValue Ch = N->getOperand(0); + + Lo = DAG.getNode(N->getOpcode(), dl, + DAG.getVTList(LHSLo.getValueType(), MVT::Other), + Ch, LHSLo, RHSLo, Flags); + Hi = DAG.getNode(N->getOpcode(), dl, + DAG.getVTList(LHSHi.getValueType(), MVT::Other), + Ch, LHSHi, RHSHi, Flags); + + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + Lo = Lo.getValue(0); + Hi = Hi.getValue(0); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Op0Lo, Op0Hi; @@ -1990,11 +2046,16 @@ break; case ISD::FADD: + case ISD::FADD_W_CHAIN: case ISD::FMUL: + case ISD::FMUL_W_CHAIN: case ISD::FPOW: case ISD::FSUB: + case ISD::FSUB_W_CHAIN: case ISD::FDIV: + case ISD::FDIV_W_CHAIN: case ISD::FREM: + case ISD::FREM_W_CHAIN: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -2096,6 +2157,20 @@ } if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + if (isFPOpWithChain(N->getOpcode())) { + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + SDValue Ch = DAG.getNode(N->getOpcode(), dl, + DAG.getVTList(WidenVT, MVT::Other), + N->getOperand(0), InOp1, InOp2, N->getFlags()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch.getValue(1)); + + return Ch.getValue(0); + } + // Operation doesn't trap so just widen as normal. SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -2103,8 +2178,13 @@ } // No legal vector version so unroll the vector operation and then widen. - if (NumElts == 1) - return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + if (NumElts == 1) { + SDValue Chain; + SDValue Ret = DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements(), &Chain); + if (Chain) + AnalyzeNewNode(Chain.getNode()); + return Ret; + } // Since the operation can trap, apply operation on the original vector. EVT MaxVT = VT; Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -937,6 +937,10 @@ SDVTList VTs, SDValue N1, SDValue N2, const SDNodeFlags *Flags) { + assert(!isFPOpWithChain(Opcode) && + "F*_W_CHAIN instructions should be constucted with overload that " + "takes a chain."); + if (isBinOpWithFlags(Opcode)) { // If no flags were passed in, use a default flags object. SDNodeFlags F; @@ -954,6 +958,24 @@ return N; } +BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, + SDVTList VTs, SDValue Chain, + SDValue N1, SDValue N2, + const SDNodeFlags *Flags) { + assert(isFPOpWithChain(Opcode) && + "Expected floating-point instruction with a chain."); + + // If no flags were passed in, use a default flags object. + SDNodeFlags F; + if (Flags == nullptr) + Flags = &F; + + BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Chain, N1, N2, *Flags); + + return FN; +} + SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); @@ -3397,6 +3419,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags *Flags) { + assert(!isFPOpWithChain(Opcode) && + "F*_W_CHAIN instructions require a chain, use another overload."); + ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); ConstantFPSDNode *N1CFP = dyn_cast(N1); @@ -3889,6 +3914,45 @@ return SDValue(N, 0); } +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, + SDValue Chain, SDValue N1, SDValue N2, + const SDNodeFlags *Flags) { + assert(isFPOpWithChain(Opcode) && + "Expected floating-point instruction with a chain."); + + if (N1.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::FSUB_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: + return N1; // fold op(undef, arg2) -> undef + case ISD::FADD_W_CHAIN: + case ISD::FMUL_W_CHAIN: + std::swap(N1, N2); + break; + } + } + + if (getTarget().Options.UnsafeFPMath && N2.getOpcode() == ISD::UNDEF) + return N2; // fold op(arg1, undef) -> undef + + // Memoize this node if possible. + SDValue Ops[] = {Chain, N1, N2}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + AddNodeIDFlags(ID, Opcode, Flags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + BinarySDNode *N = GetBinarySDNode(Opcode, DL, VTs, Chain, N1, N2, Flags); + + CSEMap.InsertNode(N, IP); + + InsertNode(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. @@ -5489,9 +5553,9 @@ } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - ArrayRef Ops) { + ArrayRef Ops, const SDNodeFlags *Flags) { if (VTList.NumVTs == 1) - return getNode(Opcode, DL, VTList.VTs[0], Ops); + return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); #if 0 switch (Opcode) { @@ -5530,13 +5594,14 @@ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1]); + N = GetBinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Flags); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1], Ops[2]); + if (isFPOpWithChain(Opcode)) + N = GetBinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2], Flags); + else + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, + Ops[0], Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops); @@ -5547,13 +5612,14 @@ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1]); + N = GetBinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Flags); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1], Ops[2]); + if (isFPOpWithChain(Opcode)) + N = GetBinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2], Flags); + else + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops); @@ -6911,11 +6977,14 @@ return nullptr; } -SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { - assert(N->getNumValues() == 1 && +SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE, + SDValue *TokenFactor) { + assert(((isFPOpWithChain(N->getOpcode()) && N->getNumValues() == 2) || + N->getNumValues() == 1) && "Can't unroll a vector with multiple results!"); EVT VT = N->getValueType(0); + const SDNodeFlags *Flags = N->getFlags(); unsigned NE = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); @@ -6929,6 +6998,8 @@ else if (NE > ResNE) NE = ResNE; + SmallVector ArgChains; + unsigned i; for (i= 0; i != NE; ++i) { for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { @@ -6952,6 +7023,17 @@ N->getFlags())); break; } + case ISD::FADD_W_CHAIN: + case ISD::FSUB_W_CHAIN: + case ISD::FMUL_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: { + SDValue NewNode = getNode(N->getOpcode(), dl, + getVTList(EltVT, MVT::Other), Operands, Flags); + Scalars.push_back(NewNode); + ArgChains.push_back(NewNode.getValue(1)); + break; + } case ISD::VSELECT: Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); break; @@ -6974,6 +7056,22 @@ } } + switch (N->getOpcode()) { + default: break; + case ISD::FADD_W_CHAIN: + case ISD::FSUB_W_CHAIN: + case ISD::FMUL_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: { + SDValue tmp; + if (!TokenFactor) + TokenFactor = &tmp; + *TokenFactor = getNode(ISD::TokenFactor, dl, MVT::Other, ArgChains); + ReplaceAllUsesOfValueWith(SDValue(N, 1), *TokenFactor); + break; + } + } + for (; i < ResNE; ++i) Scalars.push_back(getUNDEF(EltVT)); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2315,6 +2315,20 @@ visitBinary(I, ISD::FSUB); } +/// Possibly updates opcode of a floating point operation to one that is +/// protected against instruction reordering, which can otherwise lead to +/// differences in observable side effects at run-time. +static unsigned toFPOpWithChain(unsigned OpCode) { + switch (OpCode) { + default: return OpCode; + case ISD::FADD: return ISD::FADD_W_CHAIN; + case ISD::FSUB: return ISD::FSUB_W_CHAIN; + case ISD::FMUL: return ISD::FMUL_W_CHAIN; + case ISD::FDIV: return ISD::FDIV_W_CHAIN; + case ISD::FREM: return ISD::FREM_W_CHAIN; + } +} + void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2332,8 +2346,24 @@ if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); - if (const FPMathOperator *FPOp = dyn_cast(&I)) + if (auto *FPOp = dyn_cast(&I)) { FMF = FPOp->getFastMathFlags(); + } else if (isa(&I)) { + // Constant expressions don't store fast-math flags, so fill those related + // to floating-point access from global options. + switch (OpCode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (!TM.Options.AllowFPExceptAccess) + FMF.setNoExceptions(); + if (!TM.Options.AllowFPRoundAccess) + FMF.setNoRounding(); + break; + } + } SDNodeFlags Flags; Flags.setExact(exact); @@ -2348,9 +2378,32 @@ Flags.setNoExceptions(FMF.noExceptions()); Flags.setNoRounding(FMF.noRounding()); } - SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, &Flags); - setValue(&I, BinNodeValue); + + if (!FMF.noExceptions() || !FMF.noRounding() || + TM.Options.AllowFPExceptAccess || TM.Options.AllowFPRoundAccess) + OpCode = toFPOpWithChain(OpCode); + + SDValue BinNodeValue; + switch (OpCode) { + default: + BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, &Flags); + setValue(&I, BinNodeValue); + break; + case ISD::FADD_W_CHAIN: + case ISD::FSUB_W_CHAIN: + case ISD::FMUL_W_CHAIN: + case ISD::FDIV_W_CHAIN: + case ISD::FREM_W_CHAIN: + BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), + DAG.getVTList(Op1.getValueType(), MVT::Other), + getRoot(), Op1, Op2, &Flags); + setValue(&I, BinNodeValue.getValue(0)); + + if (BinNodeValue->getNumValues() == 2) + DAG.setRoot(BinNodeValue.getValue(1)); + break; + } } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -194,12 +194,17 @@ case ISD::ROTL: return "rotl"; case ISD::ROTR: return "rotr"; case ISD::FADD: return "fadd"; + case ISD::FADD_W_CHAIN: return "fadd_w_chain"; case ISD::FSUB: return "fsub"; + case ISD::FSUB_W_CHAIN: return "fsub_w_chain"; case ISD::FMUL: return "fmul"; + case ISD::FMUL_W_CHAIN: return "fmul_w_chain"; case ISD::FDIV: return "fdiv"; + case ISD::FDIV_W_CHAIN: return "fdiv_w_chain"; case ISD::FMA: return "fma"; case ISD::FMAD: return "fmad"; case ISD::FREM: return "frem"; + case ISD::FREM_W_CHAIN: return "frem_w_chain"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FPOW: return "fpow"; Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -3160,8 +3160,12 @@ } // If this has chain/glue inputs, add them. - if (EmitNodeInfo & OPFL_Chain) - Ops.push_back(InputChain); + if (EmitNodeInfo & OPFL_Chain) { + if (InputChain.getNode()) + Ops.push_back(InputChain); + else + Ops.push_back(CurDAG->getEntryNode()); + } if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -89,15 +89,19 @@ bool isSigned, SDLoc dl, bool doesNotReturn, bool isReturnValueUsed) const { + bool HasChain = (!Ops.empty() && Ops[0].getValueType() == MVT::Other); + TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (SDValue Op : Ops) { - Entry.Node = Op; + for (unsigned i = HasChain ? 1 : 0; i != Ops.size(); ++i) { + Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); - Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), + isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), + isSigned); Args.push_back(Entry); } @@ -111,7 +115,8 @@ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + CLI.setDebugLoc(dl) + .setChain(HasChain ? Ops[0] : DAG.getEntryNode()) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) .setSExtResult(signExtend).setZExtResult(!signExtend); Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -483,6 +483,8 @@ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const; + SDValue LowerF128CallWithChain(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -138,6 +138,9 @@ setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f80, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f32, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f80, Expand); // Custom lowering hooks are needed for XOR // to fold it into CSINC/CSINV. @@ -148,19 +151,24 @@ // there's a valid register class, so we need custom operations in most cases. setOperationAction(ISD::FABS, MVT::f128, Expand); setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FADD_W_CHAIN, MVT::f128, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); setOperationAction(ISD::FCOS, MVT::f128, Expand); setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::f128, Custom); setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::f128, Custom); setOperationAction(ISD::FNEG, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); setOperationAction(ISD::FSIN, MVT::f128, Expand); setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::f128, Custom); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); setOperationAction(ISD::SETCC, MVT::f128, Custom); setOperationAction(ISD::BR_CC, MVT::f128, Custom); @@ -272,53 +280,66 @@ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // f16 is a storage-only type, always promote it to f32. - setOperationAction(ISD::SETCC, MVT::f16, Promote); - setOperationAction(ISD::BR_CC, MVT::f16, Promote); - setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); - setOperationAction(ISD::SELECT, MVT::f16, Promote); - setOperationAction(ISD::FADD, MVT::f16, Promote); - setOperationAction(ISD::FSUB, MVT::f16, Promote); - setOperationAction(ISD::FMUL, MVT::f16, Promote); - setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FMA, MVT::f16, Promote); - setOperationAction(ISD::FNEG, MVT::f16, Promote); - setOperationAction(ISD::FABS, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FPOWI, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FSQRT, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); - setOperationAction(ISD::FMINNUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); - setOperationAction(ISD::FMINNAN, MVT::f16, Promote); - setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FADD_W_CHAIN, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINNAN, MVT::f16, Promote); + setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); // v4f16 is also a storage-only type, so promote it to v4f32 when that is // known to be safe. setOperationAction(ISD::FADD, MVT::v4f16, Promote); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v4f16, Promote); setOperationAction(ISD::FSUB, MVT::v4f16, Promote); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v4f16, Promote); setOperationAction(ISD::FMUL, MVT::v4f16, Promote); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v4f16, Promote); setOperationAction(ISD::FDIV, MVT::v4f16, Promote); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f16, Promote); setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote); setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote); AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FADD_W_CHAIN, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FSUB_W_CHAIN, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FMUL_W_CHAIN, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FDIV_W_CHAIN, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32); @@ -336,6 +357,7 @@ setOperationAction(ISD::FPOW, MVT::v4f16, Expand); setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); setOperationAction(ISD::FREM, MVT::v4f16, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::v4f16, Expand); setOperationAction(ISD::FROUND, MVT::v4f16, Expand); setOperationAction(ISD::FRINT, MVT::v4f16, Expand); setOperationAction(ISD::FSIN, MVT::v4f16, Expand); @@ -356,24 +378,29 @@ // v8f16 is also a storage-only type, so expand it. setOperationAction(ISD::FABS, MVT::v8f16, Expand); setOperationAction(ISD::FADD, MVT::v8f16, Expand); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v8f16, Expand); setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); setOperationAction(ISD::FCOS, MVT::v8f16, Expand); setOperationAction(ISD::FDIV, MVT::v8f16, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v8f16, Expand); setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); setOperationAction(ISD::FMA, MVT::v8f16, Expand); setOperationAction(ISD::FMUL, MVT::v8f16, Expand); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v8f16, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); setOperationAction(ISD::FNEG, MVT::v8f16, Expand); setOperationAction(ISD::FPOW, MVT::v8f16, Expand); setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); setOperationAction(ISD::FREM, MVT::v8f16, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::v8f16, Expand); setOperationAction(ISD::FROUND, MVT::v8f16, Expand); setOperationAction(ISD::FRINT, MVT::v8f16, Expand); setOperationAction(ISD::FSIN, MVT::v8f16, Expand); setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); setOperationAction(ISD::FSUB, MVT::v8f16, Expand); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v8f16, Expand); setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); setOperationAction(ISD::SETCC, MVT::v8f16, Expand); setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); @@ -529,23 +556,28 @@ // silliness like this: setOperationAction(ISD::FABS, MVT::v1f64, Expand); setOperationAction(ISD::FADD, MVT::v1f64, Expand); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v1f64, Expand); setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); setOperationAction(ISD::FCOS, MVT::v1f64, Expand); setOperationAction(ISD::FDIV, MVT::v1f64, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v1f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); setOperationAction(ISD::FMA, MVT::v1f64, Expand); setOperationAction(ISD::FMUL, MVT::v1f64, Expand); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v1f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); setOperationAction(ISD::FNEG, MVT::v1f64, Expand); setOperationAction(ISD::FPOW, MVT::v1f64, Expand); setOperationAction(ISD::FREM, MVT::v1f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::v1f64, Expand); setOperationAction(ISD::FROUND, MVT::v1f64, Expand); setOperationAction(ISD::FRINT, MVT::v1f64, Expand); setOperationAction(ISD::FSIN, MVT::v1f64, Expand); setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); setOperationAction(ISD::FSUB, MVT::v1f64, Expand); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v1f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); setOperationAction(ISD::SETCC, MVT::v1f64, Expand); setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); @@ -687,6 +719,7 @@ setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FREM_W_CHAIN, VT.getSimpleVT(), Expand); setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); @@ -1664,6 +1697,13 @@ return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; } +SDValue AArch64TargetLowering::LowerF128CallWithChain(SDValue Op, + SelectionDAG &DAG, + RTLIB::Libcall Call) const { + SmallVector Ops(Op->op_begin() + 1, Op->op_end()); + return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; +} + static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { SDValue Sel = Op.getOperand(0); SDValue Other = Op.getOperand(1); @@ -2276,6 +2316,14 @@ return LowerF128Call(Op, DAG, RTLIB::MUL_F128); case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128); + case ISD::FADD_W_CHAIN: + return LowerF128CallWithChain(Op, DAG, RTLIB::ADD_F128); + case ISD::FSUB_W_CHAIN: + return LowerF128CallWithChain(Op, DAG, RTLIB::SUB_F128); + case ISD::FMUL_W_CHAIN: + return LowerF128CallWithChain(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV_W_CHAIN: + return LowerF128CallWithChain(Op, DAG, RTLIB::DIV_F128); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -2528,8 +2528,12 @@ //===----------------------------------------------------------------------===// defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FADD_FPE : TwoOperandFPData<0b0010, "fadd", faddwchain>; let SchedRW = [WriteFDiv] in { defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FDIV_FPE : TwoOperandFPData<0b0001, "fdiv", fdivwchain>; } defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>; @@ -2537,9 +2541,13 @@ defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>; let SchedRW = [WriteFMul] in { defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FMUL_FPE : TwoOperandFPData<0b0000, "fmul", fmulwchain>; defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; } defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FSUB_FPE : TwoOperandFPData<0b0011, "fsub", fsubwchain>; def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; @@ -2861,11 +2869,18 @@ defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FADDP_FPE : SIMDThreeSameVectorFP<1,0,0b11010,"faddp", + int_aarch64_neon_addp>; defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FADD_FPE : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", faddwchain>; defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FDIV_FPE : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdivwchain>; defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", fmaxnum>; defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; @@ -2895,9 +2910,13 @@ defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FMUL_FPE : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmulwchain>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FSUB_FPE : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsubwchain>; defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", @@ -3621,6 +3640,8 @@ defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; defm FADDP : SIMDFPPairwiseScalar<1, 0, 0b01101, "faddp">; +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm FADDP_FPE : SIMDFPPairwiseScalar<1, 0, 0b01101, "faddp">; defm FMAXNMP : SIMDFPPairwiseScalar<1, 0, 0b01100, "fmaxnmp">; defm FMAXP : SIMDFPPairwiseScalar<1, 0, 0b01111, "fmaxp">; defm FMINNMP : SIMDFPPairwiseScalar<1, 1, 0b01100, "fminnmp">; @@ -3635,6 +3656,13 @@ (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), (FADDPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), + (FADDP_FPEv2i32p V64:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), + (FADDP_FPEv2i32p (EXTRACT_SUBREG + (FADDP_FPEv4f32 V128:$Rn, V128:$Rn), dsub))>; +def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), + (FADDP_FPEv2i64p V128:$Rn)>; def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), (FMAXNMPv2i32p V64:$Rn)>; def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), @@ -5879,12 +5907,18 @@ def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; +def : Pat<(f64 (faddwchain (vector_extract (v2f64 FPR128:$Rn), (i64 0)), + (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), + (f64 (FADDP_FPEv2i64p (v2f64 FPR128:$Rn)))>; // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, // so we match on v4f32 here, not v2f32. This will also catch adding // the low two lanes of a true v4f32 vector. def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), (vector_extract (v4f32 FPR128:$Rn), (i64 1))), (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; +def : Pat<(faddwchain (vector_extract (v4f32 FPR128:$Rn), (i64 0)), + (vector_extract (v4f32 FPR128:$Rn), (i64 1))), + (f32 (FADDP_FPEv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; // Scalar 64-bit shifts in FPR64 registers. def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -136,12 +136,14 @@ } // Neon does not support vector divide/remainder operations. - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::FDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM_W_CHAIN, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SABSDIFF, VT, Legal); @@ -487,12 +489,17 @@ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); // FIXME: Code duplication: FDIV and FREM are expanded always, see // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::v2f64, Expand); // FIXME: Create unittest. // In another words, find a way when "copysign" appears in DAG with vector // operands. @@ -652,39 +659,44 @@ // operations, f64 is legal for the few double-precision instructions which // are present However, no double-precision operations other than moves, // loads and stores are provided by the hardware. - setOperationAction(ISD::FADD, MVT::f64, Expand); - setOperationAction(ISD::FSUB, MVT::f64, Expand); - setOperationAction(ISD::FMUL, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FDIV, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); - setOperationAction(ISD::FNEG, MVT::f64, Expand); - setOperationAction(ISD::FABS, MVT::f64, Expand); - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOWI, MVT::f64, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FLOG, MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP, MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FADD_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -921,14 +933,16 @@ setOperationAction(ISD::BR_JT, MVT::Other, Custom); // We don't support sin/cos/fmod/copysign/pow - setOperationAction(ISD::FSIN, MVT::f64, Expand); - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f32, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -4105,6 +4105,12 @@ v2f32, v2f32, fadd, 1>; def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; +let isCodeGenOnly = 1 in { +def VADDfd_FPE : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", + v2f32, v2f32, faddwchain, 1>; +def VADDfq_FPE : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", + v4f32, v4f32, faddwchain, 1>; +} // VADDL : Vector Add Long (Q = D + D) defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, "vaddl", "s", add, sext, 1>; @@ -4160,6 +4166,12 @@ v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; +let isCodeGenOnly = 1 in { +def VMULfd_FPE : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", + "f32", v2f32, v2f32, fmulwchain, 1>; +def VMULfq_FPE : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", + "f32", v4f32, v4f32, fmulwchain, 1>; +} defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, @@ -4597,6 +4609,12 @@ v2f32, v2f32, fsub, 0>; def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; +let isCodeGenOnly = 1 in { +def VSUBfd_FPE : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", + v2f32, v2f32, fsubwchain, 0>; +def VSUBfq_FPE : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", + v4f32, v4f32, fsubwchain, 0>; +} // VSUBL : Vector Subtract Long (Q = D - D) defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, "vsubl", "s", sub, sext, 0>; @@ -6349,8 +6367,11 @@ ssub_0))>; def : N3VSPat; +def : N3VSPat; def : N3VSPat; +def : N3VSPat; def : N3VSPat; +def : N3VSPat; def : N3VSMulOpPat, Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat, Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -280,79 +280,113 @@ // let TwoOperandAliasConstraint = "$Dn = $Dd" in -def VADDD : ADbI<0b11100, 0b11, 0, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>; +class VADDD : ADbI<0b11100, 0b11, 0, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, + (OpCode DPR:$Dn, (f64 DPR:$Dm)))]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in -def VADDS : ASbIn<0b11100, 0b11, 0, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { +class VADDS : ASbIn<0b11100, 0b11, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, + (OpCode SPR:$Sn, SPR:$Sm))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } let TwoOperandAliasConstraint = "$Dn = $Dd" in -def VSUBD : ADbI<0b11100, 0b11, 1, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>; +class VSUBD : ADbI<0b11100, 0b11, 1, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, + (OpCode DPR:$Dn, (f64 DPR:$Dm)))]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in -def VSUBS : ASbIn<0b11100, 0b11, 1, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { +class VSUBS : ASbIn<0b11100, 0b11, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, + (OpCode SPR:$Sn, SPR:$Sm))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } let TwoOperandAliasConstraint = "$Dn = $Dd" in -def VDIVD : ADbI<0b11101, 0b00, 0, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>; +class VDIVD : ADbI<0b11101, 0b00, 0, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, + (OpCode DPR:$Dn, (f64 DPR:$Dm)))]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in -def VDIVS : ASbI<0b11101, 0b00, 0, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; +class VDIVS : ASbI<0b11101, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (OpCode SPR:$Sn, SPR:$Sm))]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in -def VMULD : ADbI<0b11100, 0b10, 0, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>; +class VMULD : ADbI<0b11100, 0b10, 0, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, + (OpCode DPR:$Dn, (f64 DPR:$Dm)))]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in -def VMULS : ASbIn<0b11100, 0b10, 0, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { +class VMULS : ASbIn<0b11100, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, + (OpCode SPR:$Sn, SPR:$Sm))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VNMULD : ADbI<0b11100, 0b10, 1, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>; +class VNMULD : ADbI<0b11100, 0b10, 1, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, + (fneg (OpCode DPR:$Dn, + (f64 DPR:$Dm))))]>; -def VNMULS : ASbI<0b11100, 0b10, 1, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { +class VNMULS : ASbI<0b11100, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, + (fneg (OpCode SPR:$Sn, SPR:$Sm)))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } +def VADDD : VADDD; +def VADDS : VADDS; +def VSUBD : VSUBD; +def VSUBS : VSUBS; +def VDIVD : VDIVD; +def VDIVS : VDIVS; +def VMULD : VMULD; +def VMULS : VMULS; +def VNMULD : VNMULD; +def VNMULS : VNMULS; + +let isCodeGenOnly = 1 in { +def VADDD_FPE : VADDD; +def VADDS_FPE : VADDS; +def VSUBD_FPE : VSUBD; +def VSUBS_FPE : VSUBS; +def VDIVD_FPE : VDIVD; +def VDIVS_FPE : VDIVS; +def VMULD_FPE : VMULD; +def VMULS_FPE : VMULS; +def VNMULD_FPE : VNMULD; +def VNMULS_FPE : VNMULS; +} + multiclass vsel_inst opc, int CC> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", Uses = [CPSR], AddedComplexity = 4 in { @@ -1837,12 +1871,20 @@ def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", + (VADDS_FPE SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm", (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD_FPE DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", + (VSUBS_FPE SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm", (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD_FPE DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; // No need for the size suffix on VSQRT. It's implied by the register classes. def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1699,7 +1699,7 @@ for (unsigned FPExpOp : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS, - ISD::FPOW, ISD::FCOPYSIGN}) { + ISD::FPOW, ISD::FCOPYSIGN, ISD::FDIV_W_CHAIN, ISD::FREM_W_CHAIN}) { setOperationAction(FPExpOp, MVT::f32, Expand); setOperationAction(FPExpOp, MVT::f64, Expand); } @@ -1762,6 +1762,9 @@ ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, + + ISD::FADD_W_CHAIN, ISD::FSUB_W_CHAIN, ISD::FMUL_W_CHAIN, + ISD::FDIV_W_CHAIN, ISD::FREM_W_CHAIN, // Misc: ISD::SELECT, ISD::ConstantPool, // Vector: @@ -1827,10 +1830,13 @@ // Subtarget-specific operation actions. // if (Subtarget.hasV5TOps()) { - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FADD, MVT::f64, Expand); - setOperationAction(ISD::FSUB, MVT::f64, Expand); - setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FADD_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::f64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); @@ -1863,7 +1869,8 @@ // Expand these operations for both f32 and f64: for (unsigned FPExpOpV4 : - {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) { + {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA, + ISD::FADD_W_CHAIN, ISD::FSUB_W_CHAIN, ISD::FMUL_W_CHAIN}) { setOperationAction(FPExpOpV4, MVT::f32, Expand); setOperationAction(FPExpOpV4, MVT::f64, Expand); } Index: lib/Target/Hexagon/HexagonInstrInfoV5.td =================================================================== --- lib/Target/Hexagon/HexagonInstrInfoV5.td +++ lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -172,9 +172,16 @@ let isCommutable = 1 in { def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>; def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>; + + let hasSideEffects = 1, isCodeGenOnly = 1 in { + def F2_sfaddwchain : T_MInstFloat < "sfadd", 0b000, 0b000>; + def F2_sfmpywchain : T_MInstFloat < "sfmpy", 0b010, 0b000>; + } } def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>; +let hasSideEffects = 1, isCodeGenOnly = 1 in + def F2_sfsubwchain : T_MInstFloat < "sfsub", 0b000, 0b001>; def: Pat<(f32 (fadd F32:$src1, F32:$src2)), (F2_sfadd F32:$src1, F32:$src2)>; @@ -185,6 +192,15 @@ def: Pat<(f32 (fmul F32:$src1, F32:$src2)), (F2_sfmpy F32:$src1, F32:$src2)>; +def: Pat<(f32 (faddwchain F32:$src1, F32:$src2)), + (F2_sfaddwchain F32:$src1, F32:$src2)>; + +def: Pat<(f32 (fsubwchain F32:$src1, F32:$src2)), + (F2_sfsubwchain F32:$src1, F32:$src2)>; + +def: Pat<(f32 (fmulwchain F32:$src1, F32:$src2)), + (F2_sfmpywchain F32:$src1, F32:$src2)>; + let Itinerary = M_tc_3x_SLOT23 in { def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>; def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>; Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -373,6 +373,8 @@ setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f32, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f64, Expand); // Lower f16 conversion operations into library calls setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); Index: lib/Target/Mips/MipsInstrFPU.td =================================================================== --- lib/Target/Mips/MipsInstrFPU.td +++ lib/Target/Mips/MipsInstrFPU.td @@ -462,11 +462,34 @@ ADDS_FM<0x01, 16>; defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; +let isCodeGenOnly = 1 in { + def FADD_S_FPE : MMRel, ADDS_FT<"add.s", FGR32Opnd, II_ADD_S, 1, + faddwchain>; + defm FADD_FPE : ADDS_M<"add.d", II_ADD_D, 1, faddwchain>; + + def FDIV_S_FPE : MMRel, ADDS_FT<"div.s", FGR32Opnd, II_DIV_S, 0, + fdivwchain>; + defm FDIV_FPE : ADDS_M<"div.d", II_DIV_D, 0, fdivwchain>; + + def FMUL_S_FPE : MMRel, ADDS_FT<"mul.s", FGR32Opnd, II_MUL_S, 1, + fmulwchain>; + defm FMUL_FPE : ADDS_M<"mul.d", II_MUL_D, 1, fmulwchain>; + + def FSUB_S_FPE : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, + fsubwchain>; + defm FSUB_FPE : ADDS_M<"sub.d", II_SUB_D, 0, fsubwchain>; +} + def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; +let isCodeGenOnly = 1 in { + def MADD_S_FPE : MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, faddwchain>; + def MSUB_S_FPE : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsubwchain>; +} + let AdditionalPredicates = [NoNaNsFPMath] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; Index: lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- lib/Target/NVPTX/NVPTXInstrInfo.td +++ lib/Target/NVPTX/NVPTXInstrInfo.td @@ -693,10 +693,16 @@ defm FADD : F3<"add", fadd>; defm FSUB : F3<"sub", fsub>; defm FMUL : F3<"mul", fmul>; +defm FADD_FPE : F3<"add", faddwchain>; +defm FSUB_FPE : F3<"sub", fsubwchain>; +defm FMUL_FPE : F3<"mul", fmulwchain>; defm FADD_rn : F3_rn<"add", fadd>; defm FSUB_rn : F3_rn<"sub", fsub>; defm FMUL_rn : F3_rn<"mul", fmul>; +defm FADD_rn_FPE : F3_rn<"add", faddwchain>; +defm FSUB_rn_FPE : F3_rn<"sub", fsubwchain>; +defm FMUL_rn_FPE : F3_rn<"mul", fmulwchain>; defm FABS : F2<"abs", fabs>; defm FNEG : F2<"neg", fneg>; @@ -705,21 +711,25 @@ // // F64 division // -def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), - (ins f64imm:$a, Float64Regs:$b), - "rcp.rn.f64 \t$dst, $b;", - [(set Float64Regs:$dst, - (fdiv DoubleConst1:$a, Float64Regs:$b))]>; -def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, - (fdiv Float64Regs:$a, Float64Regs:$b))]>; -def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, - (fdiv Float64Regs:$a, fpimm:$b))]>; +multiclass FDIV64 { + def 1r : NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, Float64Regs:$b), + "rcp.rn.f64 \t$dst, $b;", + [(set Float64Regs:$dst, + (OpNode DoubleConst1:$a, Float64Regs:$b))]>; + def rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, fpimm:$b))]>; +} +defm FDIV64 : FDIV64; +defm : FDIV64; // // F32 Approximate reciprocal @@ -825,30 +835,34 @@ // // F32 Accurate division // -def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[doF32FTZ, reqPTX20]>; -def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[doF32FTZ, reqPTX20]>; -def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[reqPTX20]>; -def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[reqPTX20]>; +multiclass FDIV32 { + def rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; + def ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; + def rr_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; + def ri_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[reqPTX20]>; +} +defm FDIV32 : FDIV32; +defm : FDIV32; // // F32 rsqrt Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -142,6 +142,7 @@ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::ppcf128, Expand); // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); @@ -164,12 +165,14 @@ setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); @@ -445,7 +448,9 @@ setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM_W_CHAIN, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); @@ -520,6 +525,7 @@ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } @@ -576,6 +582,7 @@ setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); @@ -652,10 +659,14 @@ } if (Subtarget.hasQPX()) { - setOperationAction(ISD::FADD, MVT::v4f64, Legal); - setOperationAction(ISD::FSUB, MVT::v4f64, Legal); - setOperationAction(ISD::FMUL, MVT::v4f64, Legal); - setOperationAction(ISD::FREM, MVT::v4f64, Expand); + setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v4f64, Legal); + setOperationAction(ISD::FREM, MVT::v4f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::v4f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); @@ -705,10 +716,14 @@ addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); - setOperationAction(ISD::FADD, MVT::v4f32, Legal); - setOperationAction(ISD::FSUB, MVT::v4f32, Legal); - setOperationAction(ISD::FMUL, MVT::v4f32, Legal); - setOperationAction(ISD::FREM, MVT::v4f32, Expand); + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v4f32, Legal); + setOperationAction(ISD::FREM, MVT::v4f32, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); @@ -794,15 +809,19 @@ if (TM.Options.UnsafeFPMath) { setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } else { setOperationAction(ISD::FDIV, MVT::v4f64, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f32, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); } } @@ -860,6 +879,7 @@ // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { setTargetDAGCombine(ISD::FDIV); + setTargetDAGCombine(ISD::FDIV_W_CHAIN); setTargetDAGCombine(ISD::FSQRT); } Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -454,10 +454,14 @@ let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. let isCommutable = 1 in { -def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), - "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP, - [(set v4f32:$vD, - (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; +class VMADDFP : VAForm_1<46, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), + "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP, + [(set v4f32:$vD, + (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; +def VMADDFP : VMADDFP; +let hasSideEffects = 1, isCodeGenOnly = 1 in +def VMADDFP_FPE : VMADDFP; // FIXME: The fma+fneg pattern won't match because fneg is not legal. def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), @@ -483,10 +487,15 @@ // VX-Form instructions. AltiVec arithmetic ops. let isCommutable = 1 in { -def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vaddfp $vD, $vA, $vB", IIC_VecFP, - [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; - +class VADDFP : VXForm_1<10, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB), + "vaddfp $vD, $vA, $vB", IIC_VecFP, + [(set v4f32:$vD, + (OpNode v4f32:$vA, v4f32:$vB))]>; +def VADDFP : VADDFP; +let isCodeGenOnly = 1 in +def VADDFP_FPE : VADDFP; + def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vaddubm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; @@ -640,9 +649,14 @@ def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; -def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubfp $vD, $vA, $vB", IIC_VecGeneral, - [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; +class VSUBFP : VXForm_1<74, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB), + "vsubfp $vD, $vA, $vB", IIC_VecGeneral, + [(set v4f32:$vD, + (OpNode v4f32:$vA, v4f32:$vB))]>; +def VSUBFP : VSUBFP; +let isCodeGenOnly = 1 in +def : VSUBFP; def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vsububm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; @@ -948,7 +962,10 @@ def : Pat<(fmul v4f32:$vA, v4f32:$vB), (VMADDFP $vA, $vB, - (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; + (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; +def : Pat<(fmulwchain v4f32:$vA, v4f32:$vB), + (VMADDFP_FPE $vA, $vB, + (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; // Fused multiply add and multiply sub for packed float. These are represented // separately from the real instructions above, for operations that must have Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -2494,41 +2494,85 @@ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { let isCommutable = 1 in { - defm FADD : AForm_2r<63, 21, + multiclass FADD : + AForm_2r<63, 21, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub, - [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; - defm FADDS : AForm_2r<59, 21, + asmbase, "$FRT, $FRA, $FRB", IIC_FPAddSub, + [(set f64:$FRT, (OpCode f64:$FRA, f64:$FRB))]>; + multiclass FADDS : + AForm_2r<59, 21, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; + asmbase, "$FRT, $FRA, $FRB", IIC_FPGeneral, + [(set f32:$FRT, (OpCode f32:$FRA, f32:$FRB))]>; + + defm FADD : FADD; + defm FADDS : FADDS; + + let isCodeGenOnly = 1, hasSideEffects = 1 in { + defm FADD_FPE : FADD; + defm FADDS_FPE : FADDS; + } + } // isCommutable - defm FDIV : AForm_2r<63, 18, + multiclass FDIV : + AForm_2r<63, 18, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD, - [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; - defm FDIVS : AForm_2r<59, 18, + asmbase, "$FRT, $FRA, $FRB", IIC_FPDivD, + [(set f64:$FRT, (OpCode f64:$FRA, f64:$FRB))]>; + multiclass FDIVS : + AForm_2r<59, 18, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS, - [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; + asmbase, "$FRT, $FRA, $FRB", IIC_FPDivS, + [(set f32:$FRT, (OpCode f32:$FRA, f32:$FRB))]>; + + defm FDIV : FDIV; + defm FDIVS : FDIVS; + + let isCodeGenOnly = 1, hasSideEffects = 1 in { + defm FDIV_FPE : FDIV; + defm FDIVS_FPE : FDIVS; + } + let isCommutable = 1 in { - defm FMUL : AForm_3r<63, 25, + multiclass FMUL : + AForm_3r<63, 25, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), - "fmul", "$FRT, $FRA, $FRC", IIC_FPFused, - [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; - defm FMULS : AForm_3r<59, 25, + asmbase, "$FRT, $FRA, $FRC", IIC_FPFused, + [(set f64:$FRT, (OpCode f64:$FRA, f64:$FRC))]>; + multiclass FMULS : + AForm_3r<59, 25, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), - "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral, - [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; + asmbase, "$FRT, $FRA, $FRC", IIC_FPGeneral, + [(set f32:$FRT, (OpCode f32:$FRA, f32:$FRC))]>; + + defm FMUL : FMUL; + defm FMULS : FMULS; + + let isCodeGenOnly = 1, hasSideEffects = 1 in { + defm FMUL_FPE : FMUL; + defm FMULS_FPE : FMULS; + } + } // isCommutable - defm FSUB : AForm_2r<63, 20, + multiclass FSUB : + AForm_2r<63, 20, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub, - [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; - defm FSUBS : AForm_2r<59, 20, + asmbase, "$FRT, $FRA, $FRB", IIC_FPAddSub, + [(set f64:$FRT, (OpCode f64:$FRA, f64:$FRB))]>; + multiclass FSUBS : + AForm_2r<59, 20, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; + asmbase, "$FRT, $FRA, $FRB", IIC_FPGeneral, + [(set f32:$FRT, (OpCode f32:$FRA, f32:$FRB))]>; + + defm FSUB : FSUB; + defm FSUBS : FSUBS; + + let isCodeGenOnly = 1, hasSideEffects = 1 in { + defm FSUB_FPE : FSUB; + defm FSUBS_FPE : FSUBS; + } + } } Index: lib/Target/PowerPC/PPCInstrQPX.td =================================================================== --- lib/Target/PowerPC/PPCInstrQPX.td +++ lib/Target/PowerPC/PPCInstrQPX.td @@ -803,6 +803,46 @@ } } // neverHasSideEffects + +let isCodeGenOnly = 1 in { + // Add Instructions + let isCommutable = 1 in { + def QVFADD_FPE : AForm_2<4, 21, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, + (faddwchain v4f64:$FRA, v4f64:$FRB))]>; + def QVFADDSs_FPE : AForm_2<0, 21, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, + (faddwchain v4f32:$FRA, v4f32:$FRB))]>; + } + def QVFSUB_FPE : AForm_2<4, 20, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, + (fsubwchain v4f64:$FRA, v4f64:$FRB))]>; + def QVFSUBSs_FPE : AForm_2<0, 20, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, + (fsubwchain v4f32:$FRA, v4f32:$FRB))]>; + // Multiply Instructions + let isCommutable = 1 in { + def QVFMUL_FPE : AForm_3<4, 25, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC), + "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f64:$FRT, + (fmulwchain v4f64:$FRA, v4f64:$FRC))]>; + def QVFMULSs_FPE : AForm_3<0, 25, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC), + "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f32:$FRT, + (fmulwchain v4f32:$FRA, v4f32:$FRC))]>; + } +} + } def : InstAlias<"qvfclr $FRT", Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -778,6 +778,82 @@ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; } // hasSideEffects +// Add/Mul Instructions +let isCommutable = 1 in { + let isCodeGenOnly = 1 in { + def XSADDDP_FPE : XX3Form<60, 32, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsadddp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (faddwchain f64:$XA, f64:$XB))]>; + def XSMULDP_FPE : XX3Form<60, 48, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmuldp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fmulwchain f64:$XA, f64:$XB))]>; + + def XVADDDP_FPE : XX3Form<60, 96, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvadddp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, + (faddwchain v2f64:$XA, v2f64:$XB))]>; + + def XVADDSP_FPE : XX3Form<60, 64, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvaddsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, + (faddwchain v4f32:$XA, v4f32:$XB))]>; + + def XVMULDP_FPE : XX3Form<60, 112, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmuldp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, + (fmulwchain v2f64:$XA, v2f64:$XB))]>; + + def XVMULSP_FPE : XX3Form<60, 80, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmulsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, + (fmulwchain v4f32:$XA, v4f32:$XB))]>; + } +} + +// Subtract Instructions +let isCodeGenOnly = 1 in { + def XSSUBDP_FPE : XX3Form<60, 40, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xssubdp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fsubwchain f64:$XA, f64:$XB))]>; + + def XVSUBDP_FPE : XX3Form<60, 104, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubdp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, + (fsubwchain v2f64:$XA, v2f64:$XB))]>; + def XVSUBSP_FPE : XX3Form<60, 72, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, + (fsubwchain v4f32:$XA, v4f32:$XB))]>; +} + +// Division Instructions +let isCodeGenOnly = 1 in { + def XSDIVDP_FPE : XX3Form<60, 56, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set f64:$XT, (fdivwchain f64:$XA, f64:$XB))]>; + + def XVDIVDP_FPE : XX3Form<60, 120, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set v2f64:$XT, + (fdivwchain v2f64:$XA, v2f64:$XB))]>; + def XVDIVSP_FPE : XX3Form<60, 88, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set v4f32:$XT, + (fdivwchain v4f32:$XA, v4f32:$XB))]>; +} + // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. let usesCustomInserter = 1, // Expanded after instruction selection. @@ -1098,6 +1174,19 @@ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsmulsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; + + let isCodeGenOnly = 1 in { + def XSADDSP_FPE : XX3Form<60, 0, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsaddsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, + (faddwchain f32:$XA, f32:$XB))]>; + def XSMULSP_FPE : XX3Form<60, 16, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsmulsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, + (fmulwchain f32:$XA, f32:$XB))]>; + } } // isCommutable def XSDIVSP : XX3Form<60, 24, @@ -1121,6 +1210,17 @@ "xssubsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; + let isCodeGenOnly = 1 in { + def XSDIVSP_FPE : XX3Form<60, 24, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set f32:$XT, (fdivwchain f32:$XA, f32:$XB))]>; + def XSSUBSP_FPE : XX3Form<60, 8, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xssubsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fsubwchain f32:$XA, f32:$XB))]>; + } + // FMA Instructions let BaseName = "XSMADDASP" in { let isCommutable = 1 in Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -1613,16 +1613,19 @@ setOperationAction(ISD::FCOS , MVT::f128, Expand); setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FREM , MVT::f128, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f128, Expand); setOperationAction(ISD::FMA , MVT::f128, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FREM_W_CHAIN, MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); @@ -1688,13 +1691,17 @@ } if (Subtarget->hasHardQuad()) { - setOperationAction(ISD::FADD, MVT::f128, Legal); - setOperationAction(ISD::FSUB, MVT::f128, Legal); - setOperationAction(ISD::FMUL, MVT::f128, Legal); - setOperationAction(ISD::FDIV, MVT::f128, Legal); - setOperationAction(ISD::FSQRT, MVT::f128, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); - setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); + setOperationAction(ISD::FADD, MVT::f128, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::f128, Legal); + setOperationAction(ISD::FSUB, MVT::f128, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::f128, Legal); + setOperationAction(ISD::FMUL, MVT::f128, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::f128, Legal); + setOperationAction(ISD::FDIV, MVT::f128, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::f128, Legal); + setOperationAction(ISD::FSQRT, MVT::f128, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); if (Subtarget->isV9()) { setOperationAction(ISD::FNEG, MVT::f128, Legal); setOperationAction(ISD::FABS, MVT::f128, Legal); @@ -1713,13 +1720,17 @@ } else { // Custom legalize f128 operations. - setOperationAction(ISD::FADD, MVT::f128, Custom); - setOperationAction(ISD::FSUB, MVT::f128, Custom); - setOperationAction(ISD::FMUL, MVT::f128, Custom); - setOperationAction(ISD::FDIV, MVT::f128, Custom); - setOperationAction(ISD::FSQRT, MVT::f128, Custom); - setOperationAction(ISD::FNEG, MVT::f128, Custom); - setOperationAction(ISD::FABS, MVT::f128, Custom); + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FADD_W_CHAIN, MVT::f128, Custom); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::f128, Custom); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::f128, Custom); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::f128, Custom); + setOperationAction(ISD::FSQRT, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Custom); + setOperationAction(ISD::FABS, MVT::f128, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); @@ -2109,7 +2120,8 @@ SDValue Callee = DAG.getExternalSymbol(LibFuncName, PtrVT); Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); Type *RetTyABI = RetTy; - SDValue Chain = DAG.getEntryNode(); + bool hasChain = isFPOpWithChain(Op->getOpcode()); + SDValue Chain = (hasChain ? Op.getOperand(0) : DAG.getEntryNode()); SDValue RetPtr; if (RetTy->isFP128Ty()) { @@ -2127,7 +2139,7 @@ } assert(Op->getNumOperands() >= numArgs && "Not enough operands!"); - for (unsigned i = 0, e = numArgs; i != e; ++i) { + for (unsigned i = hasChain ? 1 : 0, e = numArgs; i != e; ++i) { Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG); } TargetLowering::CallLoweringInfo CLI(DAG); @@ -2948,12 +2960,20 @@ case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FADD: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::ADD_F128), 2); + case ISD::FADD_W_CHAIN: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::ADD_F128), 3); case ISD::FSUB: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::SUB_F128), 2); + case ISD::FSUB_W_CHAIN: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::SUB_F128), 3); case ISD::FMUL: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::MUL_F128), 2); + case ISD::FMUL_W_CHAIN: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::MUL_F128), 3); case ISD::FDIV: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::DIV_F128), 2); + case ISD::FDIV_W_CHAIN: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::DIV_F128), 3); case ISD::FSQRT: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::SQRT_F128),1); case ISD::FABS: Index: lib/Target/Sparc/SparcInstrInfo.td =================================================================== --- lib/Target/Sparc/SparcInstrInfo.td +++ lib/Target/Sparc/SparcInstrInfo.td @@ -1010,75 +1010,103 @@ // Floating-point Add and Subtract Instructions, p. 146 -def FADDS : F3_3<2, 0b110100, 0b001000001, - (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), - "fadds $rs1, $rs2, $rd", - [(set f32:$rd, (fadd f32:$rs1, f32:$rs2))]>; -def FADDD : F3_3<2, 0b110100, 0b001000010, - (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), - "faddd $rs1, $rs2, $rd", - [(set f64:$rd, (fadd f64:$rs1, f64:$rs2))]>; -def FADDQ : F3_3<2, 0b110100, 0b001000011, - (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), - "faddq $rs1, $rs2, $rd", - [(set f128:$rd, (fadd f128:$rs1, f128:$rs2))]>, - Requires<[HasHardQuad]>; - -def FSUBS : F3_3<2, 0b110100, 0b001000101, - (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), - "fsubs $rs1, $rs2, $rd", - [(set f32:$rd, (fsub f32:$rs1, f32:$rs2))]>; -def FSUBD : F3_3<2, 0b110100, 0b001000110, - (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), - "fsubd $rs1, $rs2, $rd", - [(set f64:$rd, (fsub f64:$rs1, f64:$rs2))]>; -def FSUBQ : F3_3<2, 0b110100, 0b001000111, - (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), - "fsubq $rs1, $rs2, $rd", - [(set f128:$rd, (fsub f128:$rs1, f128:$rs2))]>, - Requires<[HasHardQuad]>; +multiclass FADD { + def S : F3_3<2, 0b110100, 0b001000001, + (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), + "fadds $rs1, $rs2, $rd", + [(set f32:$rd, (OpNode f32:$rs1, f32:$rs2))]>; + def D : F3_3<2, 0b110100, 0b001000010, + (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), + "faddd $rs1, $rs2, $rd", + [(set f64:$rd, (OpNode f64:$rs1, f64:$rs2))]>; + def Q : F3_3<2, 0b110100, 0b001000011, + (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), + "faddq $rs1, $rs2, $rd", + [(set f128:$rd, (OpNode f128:$rs1, f128:$rs2))]>, + Requires<[HasHardQuad]>; +} +defm FADD : FADD; +let isCodeGenOnly = 1 in + defm : FADD; + +multiclass FSUB { + def S : F3_3<2, 0b110100, 0b001000101, + (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), + "fsubs $rs1, $rs2, $rd", + [(set f32:$rd, (OpNode f32:$rs1, f32:$rs2))]>; + def D : F3_3<2, 0b110100, 0b001000110, + (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), + "fsubd $rs1, $rs2, $rd", + [(set f64:$rd, (OpNode f64:$rs1, f64:$rs2))]>; + def Q : F3_3<2, 0b110100, 0b001000111, + (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), + "fsubq $rs1, $rs2, $rd", + [(set f128:$rd, (OpNode f128:$rs1, f128:$rs2))]>, + Requires<[HasHardQuad]>; +} +defm FSUB : FSUB; +let isCodeGenOnly = 1 in + defm : FSUB; // Floating-point Multiply and Divide Instructions, p. 147 -def FMULS : F3_3<2, 0b110100, 0b001001001, - (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), - "fmuls $rs1, $rs2, $rd", - [(set f32:$rd, (fmul f32:$rs1, f32:$rs2))]>; -def FMULD : F3_3<2, 0b110100, 0b001001010, - (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), - "fmuld $rs1, $rs2, $rd", - [(set f64:$rd, (fmul f64:$rs1, f64:$rs2))]>; -def FMULQ : F3_3<2, 0b110100, 0b001001011, - (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), - "fmulq $rs1, $rs2, $rd", - [(set f128:$rd, (fmul f128:$rs1, f128:$rs2))]>, - Requires<[HasHardQuad]>; - -def FSMULD : F3_3<2, 0b110100, 0b001101001, - (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), - "fsmuld $rs1, $rs2, $rd", - [(set f64:$rd, (fmul (fextend f32:$rs1), - (fextend f32:$rs2)))]>; -def FDMULQ : F3_3<2, 0b110100, 0b001101110, - (outs QFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), - "fdmulq $rs1, $rs2, $rd", - [(set f128:$rd, (fmul (fextend f64:$rs1), - (fextend f64:$rs2)))]>, - Requires<[HasHardQuad]>; +multiclass FMUL { + def S : F3_3<2, 0b110100, 0b001001001, + (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), + "fmuls $rs1, $rs2, $rd", + [(set f32:$rd, (OpNode f32:$rs1, f32:$rs2))]>; + def D : F3_3<2, 0b110100, 0b001001010, + (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), + "fmuld $rs1, $rs2, $rd", + [(set f64:$rd, (OpNode f64:$rs1, f64:$rs2))]>; + def Q : F3_3<2, 0b110100, 0b001001011, + (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), + "fmulq $rs1, $rs2, $rd", + [(set f128:$rd, (OpNode f128:$rs1, f128:$rs2))]>, + Requires<[HasHardQuad]>; +} +defm FMUL : FMUL; +let isCodeGenOnly = 1 in + defm : FMUL; + +class FSMULD : F3_3<2, 0b110100, 0b001101001, + (outs DFPRegs:$rd), + (ins FPRegs:$rs1, FPRegs:$rs2), + "fsmuld $rs1, $rs2, $rd", + [(set f64:$rd, (OpNode (fextend f32:$rs1), + (fextend f32:$rs2)))]>; +class FDMULQ : F3_3<2, 0b110100, 0b001101110, + (outs QFPRegs:$rd), + (ins DFPRegs:$rs1, DFPRegs:$rs2), + "fdmulq $rs1, $rs2, $rd", + [(set f128:$rd, (OpNode (fextend f64:$rs1), + (fextend f64:$rs2)))]>, + Requires<[HasHardQuad]>; +def FSMULD : FSMULD; +def FDMULQ : FDMULQ; +let isCodeGenOnly = 1 in { + def : FSMULD; + def : FDMULQ; +} -def FDIVS : F3_3<2, 0b110100, 0b001001101, - (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), - "fdivs $rs1, $rs2, $rd", - [(set f32:$rd, (fdiv f32:$rs1, f32:$rs2))]>; -def FDIVD : F3_3<2, 0b110100, 0b001001110, - (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), - "fdivd $rs1, $rs2, $rd", - [(set f64:$rd, (fdiv f64:$rs1, f64:$rs2))]>; -def FDIVQ : F3_3<2, 0b110100, 0b001001111, - (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), - "fdivq $rs1, $rs2, $rd", - [(set f128:$rd, (fdiv f128:$rs1, f128:$rs2))]>, - Requires<[HasHardQuad]>; +multiclass FDIV { +def S : F3_3<2, 0b110100, 0b001001101, + (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), + "fdivs $rs1, $rs2, $rd", + [(set f32:$rd, (OpNode f32:$rs1, f32:$rs2))]>; +def D : F3_3<2, 0b110100, 0b001001110, + (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), + "fdivd $rs1, $rs2, $rd", + [(set f64:$rd, (OpNode f64:$rs1, f64:$rs2))]>; +def Q : F3_3<2, 0b110100, 0b001001111, + (outs QFPRegs:$rd), (ins QFPRegs:$rs1, QFPRegs:$rs2), + "fdivq $rs1, $rs2, $rd", + [(set f128:$rd, (OpNode f128:$rs1, f128:$rs2))]>, + Requires<[HasHardQuad]>; +} +defm FDIV : FDIV; +let isCodeGenOnly = 1 in + defm : FDIV; // Floating-point Compare Instructions, p. 148 // Note: the 2nd template arg is different for these guys. Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -369,6 +369,7 @@ setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM_W_CHAIN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); } } @@ -388,11 +389,15 @@ // These operations have direct equivalents. setOperationAction(ISD::FADD, MVT::v2f64, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f64, Legal); Index: lib/Target/SystemZ/SystemZInstrFP.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFP.td +++ lib/Target/SystemZ/SystemZInstrFP.td @@ -358,6 +358,12 @@ def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32, FP32>; def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64, FP64>; def AXBR : BinaryRRE<"axb", 0xB34A, fadd, FP128, FP128>; + + let isCodeGenOnly = 1 in { + def AEBR_FPE : BinaryRRE<"aeb", 0xB30A, faddwchain, FP32, FP32>; + def ADBR_FPE : BinaryRRE<"adb", 0xB31A, faddwchain, FP64, FP64>; + def AXBR_FPE : BinaryRRE<"axb", 0xB34A, faddwchain, FP128, FP128>; + } } def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; @@ -369,6 +375,12 @@ def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64, FP64>; def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>; + let isCodeGenOnly = 1 in { + def SEBR_FPE : BinaryRRE<"seb", 0xB30B, fsubwchain, FP32, FP32>; + def SDBR_FPE : BinaryRRE<"sdb", 0xB31B, fsubwchain, FP64, FP64>; + def SXBR_FPE : BinaryRRE<"sxb", 0xB34B, fsubwchain, FP128, FP128>; + } + def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; } @@ -378,6 +390,12 @@ def MEEBR : BinaryRRE<"meeb", 0xB317, fmul, FP32, FP32>; def MDBR : BinaryRRE<"mdb", 0xB31C, fmul, FP64, FP64>; def MXBR : BinaryRRE<"mxb", 0xB34C, fmul, FP128, FP128>; + + let isCodeGenOnly = 1 in { + def MEEBR_FPE : BinaryRRE<"meeb", 0xB317, fmulwchain, FP32, FP32>; + def MDBR_FPE : BinaryRRE<"mdb", 0xB31C, fmulwchain, FP64, FP64>; + def MXBR_FPE : BinaryRRE<"mxb", 0xB34C, fmulwchain, FP128, FP128>; + } } def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; @@ -430,6 +448,12 @@ def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; +let isCodeGenOnly = 1 in { + def DEBR_FPE : BinaryRRE<"deb", 0xB30D, fdivwchain, FP32, FP32>; + def DDBR_FPE : BinaryRRE<"ddb", 0xB31D, fdivwchain, FP64, FP64>; + def DXBR_FPE : BinaryRRE<"dxb", 0xB34D, fdivwchain, FP128, FP128>; +} + //===----------------------------------------------------------------------===// // Comparisons //===----------------------------------------------------------------------===// Index: lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- lib/Target/SystemZ/SystemZInstrVector.td +++ lib/Target/SystemZ/SystemZInstrVector.td @@ -805,6 +805,12 @@ // Add. def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; + let isCodeGenOnly = 1 in { + def VFADB_FPE : + BinaryVRRc<"vfadb", 0xE7E3, faddwchain, v128db, v128db, 3, 0>; + def WFADB_FPE : + BinaryVRRc<"wfadb", 0xE7E3, faddwchain, v64db, v64db, 3, 8>; + } // Convert from fixed 64-bit. def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; @@ -831,6 +837,12 @@ // Divide. def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; + let isCodeGenOnly = 1 in { + def VFDDB_FPE : + BinaryVRRc<"vfddb", 0xE7E5, fdivwchain, v128db, v128db, 3, 0>; + def WFDDB_FPE : + BinaryVRRc<"wfddb", 0xE7E5, fdivwchain, v64db, v64db, 3, 8>; + } // Load FP integer. def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; @@ -851,6 +863,12 @@ // Multiply. def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; + let isCodeGenOnly = 1 in { + def VFMDB_FPE : + BinaryVRRc<"vfmdb", 0xE7E7, fmulwchain, v128db, v128db, 3, 0>; + def WFMDB_FPE : + BinaryVRRc<"wfmdb", 0xE7E7, fmulwchain, v64db, v64db, 3, 8>; + } // Multiply and add. def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; @@ -879,6 +897,12 @@ // Subtract. def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; + let isCodeGenOnly = 1 in { + def VFSDB_FPE : + BinaryVRRc<"vfsdb", 0xE7E2, fsubwchain, v128db, v128db, 3, 0>; + def WFSDB_FPE : + BinaryVRRc<"wfsdb", 0xE7E2, fsubwchain, v64db, v64db, 3, 8>; + } // Test data class immediate. let Defs = [CC] in { Index: lib/Target/X86/X86FloatingPoint.cpp =================================================================== --- lib/Target/X86/X86FloatingPoint.cpp +++ lib/Target/X86/X86FloatingPoint.cpp @@ -573,165 +573,209 @@ // concrete X86 instruction which uses the register stack. // static const TableEntry OpcodeTable[] = { - { X86::ABS_Fp32 , X86::ABS_F }, - { X86::ABS_Fp64 , X86::ABS_F }, - { X86::ABS_Fp80 , X86::ABS_F }, - { X86::ADD_Fp32m , X86::ADD_F32m }, - { X86::ADD_Fp64m , X86::ADD_F64m }, - { X86::ADD_Fp64m32 , X86::ADD_F32m }, - { X86::ADD_Fp80m32 , X86::ADD_F32m }, - { X86::ADD_Fp80m64 , X86::ADD_F64m }, - { X86::ADD_FpI16m32 , X86::ADD_FI16m }, - { X86::ADD_FpI16m64 , X86::ADD_FI16m }, - { X86::ADD_FpI16m80 , X86::ADD_FI16m }, - { X86::ADD_FpI32m32 , X86::ADD_FI32m }, - { X86::ADD_FpI32m64 , X86::ADD_FI32m }, - { X86::ADD_FpI32m80 , X86::ADD_FI32m }, - { X86::CHS_Fp32 , X86::CHS_F }, - { X86::CHS_Fp64 , X86::CHS_F }, - { X86::CHS_Fp80 , X86::CHS_F }, - { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, - { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, - { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, - { X86::CMOVB_Fp32 , X86::CMOVB_F }, - { X86::CMOVB_Fp64 , X86::CMOVB_F }, - { X86::CMOVB_Fp80 , X86::CMOVB_F }, - { X86::CMOVE_Fp32 , X86::CMOVE_F }, - { X86::CMOVE_Fp64 , X86::CMOVE_F }, - { X86::CMOVE_Fp80 , X86::CMOVE_F }, - { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, - { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, - { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, - { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, - { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, - { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, - { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, - { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, - { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, - { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, - { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, - { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, - { X86::CMOVP_Fp32 , X86::CMOVP_F }, - { X86::CMOVP_Fp64 , X86::CMOVP_F }, - { X86::CMOVP_Fp80 , X86::CMOVP_F }, - { X86::COS_Fp32 , X86::COS_F }, - { X86::COS_Fp64 , X86::COS_F }, - { X86::COS_Fp80 , X86::COS_F }, - { X86::DIVR_Fp32m , X86::DIVR_F32m }, - { X86::DIVR_Fp64m , X86::DIVR_F64m }, - { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, - { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, - { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, - { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, - { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, - { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, - { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, - { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, - { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, - { X86::DIV_Fp32m , X86::DIV_F32m }, - { X86::DIV_Fp64m , X86::DIV_F64m }, - { X86::DIV_Fp64m32 , X86::DIV_F32m }, - { X86::DIV_Fp80m32 , X86::DIV_F32m }, - { X86::DIV_Fp80m64 , X86::DIV_F64m }, - { X86::DIV_FpI16m32 , X86::DIV_FI16m }, - { X86::DIV_FpI16m64 , X86::DIV_FI16m }, - { X86::DIV_FpI16m80 , X86::DIV_FI16m }, - { X86::DIV_FpI32m32 , X86::DIV_FI32m }, - { X86::DIV_FpI32m64 , X86::DIV_FI32m }, - { X86::DIV_FpI32m80 , X86::DIV_FI32m }, - { X86::ILD_Fp16m32 , X86::ILD_F16m }, - { X86::ILD_Fp16m64 , X86::ILD_F16m }, - { X86::ILD_Fp16m80 , X86::ILD_F16m }, - { X86::ILD_Fp32m32 , X86::ILD_F32m }, - { X86::ILD_Fp32m64 , X86::ILD_F32m }, - { X86::ILD_Fp32m80 , X86::ILD_F32m }, - { X86::ILD_Fp64m32 , X86::ILD_F64m }, - { X86::ILD_Fp64m64 , X86::ILD_F64m }, - { X86::ILD_Fp64m80 , X86::ILD_F64m }, - { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, - { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, - { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, - { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, - { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, - { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, - { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, - { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, - { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, - { X86::IST_Fp16m32 , X86::IST_F16m }, - { X86::IST_Fp16m64 , X86::IST_F16m }, - { X86::IST_Fp16m80 , X86::IST_F16m }, - { X86::IST_Fp32m32 , X86::IST_F32m }, - { X86::IST_Fp32m64 , X86::IST_F32m }, - { X86::IST_Fp32m80 , X86::IST_F32m }, - { X86::IST_Fp64m32 , X86::IST_FP64m }, - { X86::IST_Fp64m64 , X86::IST_FP64m }, - { X86::IST_Fp64m80 , X86::IST_FP64m }, - { X86::LD_Fp032 , X86::LD_F0 }, - { X86::LD_Fp064 , X86::LD_F0 }, - { X86::LD_Fp080 , X86::LD_F0 }, - { X86::LD_Fp132 , X86::LD_F1 }, - { X86::LD_Fp164 , X86::LD_F1 }, - { X86::LD_Fp180 , X86::LD_F1 }, - { X86::LD_Fp32m , X86::LD_F32m }, - { X86::LD_Fp32m64 , X86::LD_F32m }, - { X86::LD_Fp32m80 , X86::LD_F32m }, - { X86::LD_Fp64m , X86::LD_F64m }, - { X86::LD_Fp64m80 , X86::LD_F64m }, - { X86::LD_Fp80m , X86::LD_F80m }, - { X86::MUL_Fp32m , X86::MUL_F32m }, - { X86::MUL_Fp64m , X86::MUL_F64m }, - { X86::MUL_Fp64m32 , X86::MUL_F32m }, - { X86::MUL_Fp80m32 , X86::MUL_F32m }, - { X86::MUL_Fp80m64 , X86::MUL_F64m }, - { X86::MUL_FpI16m32 , X86::MUL_FI16m }, - { X86::MUL_FpI16m64 , X86::MUL_FI16m }, - { X86::MUL_FpI16m80 , X86::MUL_FI16m }, - { X86::MUL_FpI32m32 , X86::MUL_FI32m }, - { X86::MUL_FpI32m64 , X86::MUL_FI32m }, - { X86::MUL_FpI32m80 , X86::MUL_FI32m }, - { X86::SIN_Fp32 , X86::SIN_F }, - { X86::SIN_Fp64 , X86::SIN_F }, - { X86::SIN_Fp80 , X86::SIN_F }, - { X86::SQRT_Fp32 , X86::SQRT_F }, - { X86::SQRT_Fp64 , X86::SQRT_F }, - { X86::SQRT_Fp80 , X86::SQRT_F }, - { X86::ST_Fp32m , X86::ST_F32m }, - { X86::ST_Fp64m , X86::ST_F64m }, - { X86::ST_Fp64m32 , X86::ST_F32m }, - { X86::ST_Fp80m32 , X86::ST_F32m }, - { X86::ST_Fp80m64 , X86::ST_F64m }, - { X86::ST_FpP80m , X86::ST_FP80m }, - { X86::SUBR_Fp32m , X86::SUBR_F32m }, - { X86::SUBR_Fp64m , X86::SUBR_F64m }, - { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, - { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, - { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, - { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, - { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, - { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, - { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, - { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, - { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, - { X86::SUB_Fp32m , X86::SUB_F32m }, - { X86::SUB_Fp64m , X86::SUB_F64m }, - { X86::SUB_Fp64m32 , X86::SUB_F32m }, - { X86::SUB_Fp80m32 , X86::SUB_F32m }, - { X86::SUB_Fp80m64 , X86::SUB_F64m }, - { X86::SUB_FpI16m32 , X86::SUB_FI16m }, - { X86::SUB_FpI16m64 , X86::SUB_FI16m }, - { X86::SUB_FpI16m80 , X86::SUB_FI16m }, - { X86::SUB_FpI32m32 , X86::SUB_FI32m }, - { X86::SUB_FpI32m64 , X86::SUB_FI32m }, - { X86::SUB_FpI32m80 , X86::SUB_FI32m }, - { X86::TST_Fp32 , X86::TST_F }, - { X86::TST_Fp64 , X86::TST_F }, - { X86::TST_Fp80 , X86::TST_F }, - { X86::UCOM_FpIr32 , X86::UCOM_FIr }, - { X86::UCOM_FpIr64 , X86::UCOM_FIr }, - { X86::UCOM_FpIr80 , X86::UCOM_FIr }, - { X86::UCOM_Fpr32 , X86::UCOM_Fr }, - { X86::UCOM_Fpr64 , X86::UCOM_Fr }, - { X86::UCOM_Fpr80 , X86::UCOM_Fr }, + { X86::ABS_Fp32 , X86::ABS_F }, + { X86::ABS_Fp64 , X86::ABS_F }, + { X86::ABS_Fp80 , X86::ABS_F }, + { X86::ADD_FPE_Fp32m , X86::ADD_FPE_F32m }, + { X86::ADD_FPE_Fp64m , X86::ADD_FPE_F64m }, + { X86::ADD_FPE_Fp64m32 , X86::ADD_FPE_F32m }, + { X86::ADD_FPE_Fp80m32 , X86::ADD_FPE_F32m }, + { X86::ADD_FPE_Fp80m64 , X86::ADD_FPE_F64m }, + { X86::ADD_FPE_FpI16m32 , X86::ADD_FPE_FI16m }, + { X86::ADD_FPE_FpI16m64 , X86::ADD_FPE_FI16m }, + { X86::ADD_FPE_FpI16m80 , X86::ADD_FPE_FI16m }, + { X86::ADD_FPE_FpI32m32 , X86::ADD_FPE_FI32m }, + { X86::ADD_FPE_FpI32m64 , X86::ADD_FPE_FI32m }, + { X86::ADD_FPE_FpI32m80 , X86::ADD_FPE_FI32m }, + { X86::ADD_Fp32m , X86::ADD_F32m }, + { X86::ADD_Fp64m , X86::ADD_F64m }, + { X86::ADD_Fp64m32 , X86::ADD_F32m }, + { X86::ADD_Fp80m32 , X86::ADD_F32m }, + { X86::ADD_Fp80m64 , X86::ADD_F64m }, + { X86::ADD_FpI16m32 , X86::ADD_FI16m }, + { X86::ADD_FpI16m64 , X86::ADD_FI16m }, + { X86::ADD_FpI16m80 , X86::ADD_FI16m }, + { X86::ADD_FpI32m32 , X86::ADD_FI32m }, + { X86::ADD_FpI32m64 , X86::ADD_FI32m }, + { X86::ADD_FpI32m80 , X86::ADD_FI32m }, + { X86::CHS_Fp32 , X86::CHS_F }, + { X86::CHS_Fp64 , X86::CHS_F }, + { X86::CHS_Fp80 , X86::CHS_F }, + { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, + { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, + { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, + { X86::CMOVB_Fp32 , X86::CMOVB_F }, + { X86::CMOVB_Fp64 , X86::CMOVB_F }, + { X86::CMOVB_Fp80 , X86::CMOVB_F }, + { X86::CMOVE_Fp32 , X86::CMOVE_F }, + { X86::CMOVE_Fp64 , X86::CMOVE_F }, + { X86::CMOVE_Fp80 , X86::CMOVE_F }, + { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, + { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, + { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, + { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, + { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, + { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, + { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, + { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, + { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, + { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, + { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, + { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, + { X86::CMOVP_Fp32 , X86::CMOVP_F }, + { X86::CMOVP_Fp64 , X86::CMOVP_F }, + { X86::CMOVP_Fp80 , X86::CMOVP_F }, + { X86::COS_Fp32 , X86::COS_F }, + { X86::COS_Fp64 , X86::COS_F }, + { X86::COS_Fp80 , X86::COS_F }, + { X86::DIVR_Fp32m , X86::DIVR_F32m }, + { X86::DIVR_Fp64m , X86::DIVR_F64m }, + { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, + { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, + { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, + { X86::DIVR_FpI16m32 , X86::DIVR_FI16m }, + { X86::DIVR_FpI16m64 , X86::DIVR_FI16m }, + { X86::DIVR_FpI16m80 , X86::DIVR_FI16m }, + { X86::DIVR_FpI32m32 , X86::DIVR_FI32m }, + { X86::DIVR_FpI32m64 , X86::DIVR_FI32m }, + { X86::DIVR_FpI32m80 , X86::DIVR_FI32m }, + { X86::DIV_FPE_Fp32m , X86::DIV_FPE_F32m }, + { X86::DIV_FPE_Fp64m , X86::DIV_FPE_F64m }, + { X86::DIV_FPE_Fp64m32 , X86::DIV_FPE_F32m }, + { X86::DIV_FPE_Fp80m32 , X86::DIV_FPE_F32m }, + { X86::DIV_FPE_Fp80m64 , X86::DIV_FPE_F64m }, + { X86::DIV_FPE_FpI16m32 , X86::DIV_FPE_FI16m }, + { X86::DIV_FPE_FpI16m64 , X86::DIV_FPE_FI16m }, + { X86::DIV_FPE_FpI16m80 , X86::DIV_FPE_FI16m }, + { X86::DIV_FPE_FpI32m32 , X86::DIV_FPE_FI32m }, + { X86::DIV_FPE_FpI32m64 , X86::DIV_FPE_FI32m }, + { X86::DIV_FPE_FpI32m80 , X86::DIV_FPE_FI32m }, + { X86::DIV_Fp32m , X86::DIV_F32m }, + { X86::DIV_Fp64m , X86::DIV_F64m }, + { X86::DIV_Fp64m32 , X86::DIV_F32m }, + { X86::DIV_Fp80m32 , X86::DIV_F32m }, + { X86::DIV_Fp80m64 , X86::DIV_F64m }, + { X86::DIV_FpI16m32 , X86::DIV_FI16m }, + { X86::DIV_FpI16m64 , X86::DIV_FI16m }, + { X86::DIV_FpI16m80 , X86::DIV_FI16m }, + { X86::DIV_FpI32m32 , X86::DIV_FI32m }, + { X86::DIV_FpI32m64 , X86::DIV_FI32m }, + { X86::DIV_FpI32m80 , X86::DIV_FI32m }, + { X86::ILD_Fp16m32 , X86::ILD_F16m }, + { X86::ILD_Fp16m64 , X86::ILD_F16m }, + { X86::ILD_Fp16m80 , X86::ILD_F16m }, + { X86::ILD_Fp32m32 , X86::ILD_F32m }, + { X86::ILD_Fp32m64 , X86::ILD_F32m }, + { X86::ILD_Fp32m80 , X86::ILD_F32m }, + { X86::ILD_Fp64m32 , X86::ILD_F64m }, + { X86::ILD_Fp64m64 , X86::ILD_F64m }, + { X86::ILD_Fp64m80 , X86::ILD_F64m }, + { X86::ISTT_Fp16m32 , X86::ISTT_FP16m }, + { X86::ISTT_Fp16m64 , X86::ISTT_FP16m }, + { X86::ISTT_Fp16m80 , X86::ISTT_FP16m }, + { X86::ISTT_Fp32m32 , X86::ISTT_FP32m }, + { X86::ISTT_Fp32m64 , X86::ISTT_FP32m }, + { X86::ISTT_Fp32m80 , X86::ISTT_FP32m }, + { X86::ISTT_Fp64m32 , X86::ISTT_FP64m }, + { X86::ISTT_Fp64m64 , X86::ISTT_FP64m }, + { X86::ISTT_Fp64m80 , X86::ISTT_FP64m }, + { X86::IST_Fp16m32 , X86::IST_F16m }, + { X86::IST_Fp16m64 , X86::IST_F16m }, + { X86::IST_Fp16m80 , X86::IST_F16m }, + { X86::IST_Fp32m32 , X86::IST_F32m }, + { X86::IST_Fp32m64 , X86::IST_F32m }, + { X86::IST_Fp32m80 , X86::IST_F32m }, + { X86::IST_Fp64m32 , X86::IST_FP64m }, + { X86::IST_Fp64m64 , X86::IST_FP64m }, + { X86::IST_Fp64m80 , X86::IST_FP64m }, + { X86::LD_Fp032 , X86::LD_F0 }, + { X86::LD_Fp064 , X86::LD_F0 }, + { X86::LD_Fp080 , X86::LD_F0 }, + { X86::LD_Fp132 , X86::LD_F1 }, + { X86::LD_Fp164 , X86::LD_F1 }, + { X86::LD_Fp180 , X86::LD_F1 }, + { X86::LD_Fp32m , X86::LD_F32m }, + { X86::LD_Fp32m64 , X86::LD_F32m }, + { X86::LD_Fp32m80 , X86::LD_F32m }, + { X86::LD_Fp64m , X86::LD_F64m }, + { X86::LD_Fp64m80 , X86::LD_F64m }, + { X86::LD_Fp80m , X86::LD_F80m }, + { X86::MUL_FPE_Fp32m , X86::MUL_FPE_F32m }, + { X86::MUL_FPE_Fp64m , X86::MUL_FPE_F64m }, + { X86::MUL_FPE_Fp64m32 , X86::MUL_FPE_F32m }, + { X86::MUL_FPE_Fp80m32 , X86::MUL_FPE_F32m }, + { X86::MUL_FPE_Fp80m64 , X86::MUL_FPE_F64m }, + { X86::MUL_FPE_FpI16m32 , X86::MUL_FPE_FI16m }, + { X86::MUL_FPE_FpI16m64 , X86::MUL_FPE_FI16m }, + { X86::MUL_FPE_FpI16m80 , X86::MUL_FPE_FI16m }, + { X86::MUL_FPE_FpI32m32 , X86::MUL_FPE_FI32m }, + { X86::MUL_FPE_FpI32m64 , X86::MUL_FPE_FI32m }, + { X86::MUL_FPE_FpI32m80 , X86::MUL_FPE_FI32m }, + { X86::MUL_Fp32m , X86::MUL_F32m }, + { X86::MUL_Fp64m , X86::MUL_F64m }, + { X86::MUL_Fp64m32 , X86::MUL_F32m }, + { X86::MUL_Fp80m32 , X86::MUL_F32m }, + { X86::MUL_Fp80m64 , X86::MUL_F64m }, + { X86::MUL_FpI16m32 , X86::MUL_FI16m }, + { X86::MUL_FpI16m64 , X86::MUL_FI16m }, + { X86::MUL_FpI16m80 , X86::MUL_FI16m }, + { X86::MUL_FpI32m32 , X86::MUL_FI32m }, + { X86::MUL_FpI32m64 , X86::MUL_FI32m }, + { X86::MUL_FpI32m80 , X86::MUL_FI32m }, + { X86::SIN_Fp32 , X86::SIN_F }, + { X86::SIN_Fp64 , X86::SIN_F }, + { X86::SIN_Fp80 , X86::SIN_F }, + { X86::SQRT_Fp32 , X86::SQRT_F }, + { X86::SQRT_Fp64 , X86::SQRT_F }, + { X86::SQRT_Fp80 , X86::SQRT_F }, + { X86::ST_Fp32m , X86::ST_F32m }, + { X86::ST_Fp64m , X86::ST_F64m }, + { X86::ST_Fp64m32 , X86::ST_F32m }, + { X86::ST_Fp80m32 , X86::ST_F32m }, + { X86::ST_Fp80m64 , X86::ST_F64m }, + { X86::ST_FpP80m , X86::ST_FP80m }, + { X86::SUBR_Fp32m , X86::SUBR_F32m }, + { X86::SUBR_Fp64m , X86::SUBR_F64m }, + { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, + { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, + { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, + { X86::SUBR_FpI16m32 , X86::SUBR_FI16m }, + { X86::SUBR_FpI16m64 , X86::SUBR_FI16m }, + { X86::SUBR_FpI16m80 , X86::SUBR_FI16m }, + { X86::SUBR_FpI32m32 , X86::SUBR_FI32m }, + { X86::SUBR_FpI32m64 , X86::SUBR_FI32m }, + { X86::SUBR_FpI32m80 , X86::SUBR_FI32m }, + { X86::SUB_FPE_Fp32m , X86::SUB_FPE_F32m }, + { X86::SUB_FPE_Fp64m , X86::SUB_FPE_F64m }, + { X86::SUB_FPE_Fp64m32 , X86::SUB_FPE_F32m }, + { X86::SUB_FPE_Fp80m32 , X86::SUB_FPE_F32m }, + { X86::SUB_FPE_Fp80m64 , X86::SUB_FPE_F64m }, + { X86::SUB_FPE_FpI16m32 , X86::SUB_FPE_FI16m }, + { X86::SUB_FPE_FpI16m64 , X86::SUB_FPE_FI16m }, + { X86::SUB_FPE_FpI16m80 , X86::SUB_FPE_FI16m }, + { X86::SUB_FPE_FpI32m32 , X86::SUB_FPE_FI32m }, + { X86::SUB_FPE_FpI32m64 , X86::SUB_FPE_FI32m }, + { X86::SUB_FPE_FpI32m80 , X86::SUB_FPE_FI32m }, + { X86::SUB_Fp32m , X86::SUB_F32m }, + { X86::SUB_Fp64m , X86::SUB_F64m }, + { X86::SUB_Fp64m32 , X86::SUB_F32m }, + { X86::SUB_Fp80m32 , X86::SUB_F32m }, + { X86::SUB_Fp80m64 , X86::SUB_F64m }, + { X86::SUB_FpI16m32 , X86::SUB_FI16m }, + { X86::SUB_FpI16m64 , X86::SUB_FI16m }, + { X86::SUB_FpI16m80 , X86::SUB_FI16m }, + { X86::SUB_FpI32m32 , X86::SUB_FI32m }, + { X86::SUB_FpI32m64 , X86::SUB_FI32m }, + { X86::SUB_FpI32m80 , X86::SUB_FI32m }, + { X86::TST_Fp32 , X86::TST_F }, + { X86::TST_Fp64 , X86::TST_F }, + { X86::TST_Fp80 , X86::TST_F }, + { X86::UCOM_FpIr32 , X86::UCOM_FIr }, + { X86::UCOM_FpIr64 , X86::UCOM_FIr }, + { X86::UCOM_FpIr80 , X86::UCOM_FIr }, + { X86::UCOM_Fpr32 , X86::UCOM_Fr }, + { X86::UCOM_Fpr64 , X86::UCOM_Fr }, + { X86::UCOM_Fpr80 , X86::UCOM_Fr }, }; static unsigned getConcreteOpcode(unsigned Opcode) { @@ -1058,66 +1102,114 @@ // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) static const TableEntry ForwardST0Table[] = { - { X86::ADD_Fp32 , X86::ADD_FST0r }, - { X86::ADD_Fp64 , X86::ADD_FST0r }, - { X86::ADD_Fp80 , X86::ADD_FST0r }, - { X86::DIV_Fp32 , X86::DIV_FST0r }, - { X86::DIV_Fp64 , X86::DIV_FST0r }, - { X86::DIV_Fp80 , X86::DIV_FST0r }, - { X86::MUL_Fp32 , X86::MUL_FST0r }, - { X86::MUL_Fp64 , X86::MUL_FST0r }, - { X86::MUL_Fp80 , X86::MUL_FST0r }, - { X86::SUB_Fp32 , X86::SUB_FST0r }, - { X86::SUB_Fp64 , X86::SUB_FST0r }, - { X86::SUB_Fp80 , X86::SUB_FST0r }, + { X86::ADD_FPE_Fp32 , X86::ADD_FST0r }, + { X86::ADD_FPE_Fp64 , X86::ADD_FST0r }, + { X86::ADD_FPE_Fp80 , X86::ADD_FST0r }, + { X86::ADD_Fp32 , X86::ADD_FST0r }, + { X86::ADD_Fp64 , X86::ADD_FST0r }, + { X86::ADD_Fp80 , X86::ADD_FST0r }, + { X86::DIV_FPE_Fp32 , X86::DIV_FST0r }, + { X86::DIV_FPE_Fp64 , X86::DIV_FST0r }, + { X86::DIV_FPE_Fp80 , X86::DIV_FST0r }, + { X86::DIV_Fp32 , X86::DIV_FST0r }, + { X86::DIV_Fp64 , X86::DIV_FST0r }, + { X86::DIV_Fp80 , X86::DIV_FST0r }, + { X86::MUL_FPE_Fp32 , X86::MUL_FST0r }, + { X86::MUL_FPE_Fp64 , X86::MUL_FST0r }, + { X86::MUL_FPE_Fp80 , X86::MUL_FST0r }, + { X86::MUL_Fp32 , X86::MUL_FST0r }, + { X86::MUL_Fp64 , X86::MUL_FST0r }, + { X86::MUL_Fp80 , X86::MUL_FST0r }, + { X86::SUB_FPE_Fp32 , X86::SUB_FST0r }, + { X86::SUB_FPE_Fp64 , X86::SUB_FST0r }, + { X86::SUB_FPE_Fp80 , X86::SUB_FST0r }, + { X86::SUB_Fp32 , X86::SUB_FST0r }, + { X86::SUB_Fp64 , X86::SUB_FST0r }, + { X86::SUB_Fp80 , X86::SUB_FST0r }, }; // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) static const TableEntry ReverseST0Table[] = { - { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative - { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative - { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative - { X86::DIV_Fp32 , X86::DIVR_FST0r }, - { X86::DIV_Fp64 , X86::DIVR_FST0r }, - { X86::DIV_Fp80 , X86::DIVR_FST0r }, - { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative - { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative - { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative - { X86::SUB_Fp32 , X86::SUBR_FST0r }, - { X86::SUB_Fp64 , X86::SUBR_FST0r }, - { X86::SUB_Fp80 , X86::SUBR_FST0r }, + { X86::ADD_FPE_Fp32 , X86::ADD_FST0r }, // commutative + { X86::ADD_FPE_Fp64 , X86::ADD_FST0r }, // commutative + { X86::ADD_FPE_Fp80 , X86::ADD_FST0r }, // commutative + { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative + { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative + { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative + { X86::DIV_FPE_Fp32 , X86::DIVR_FST0r }, + { X86::DIV_FPE_Fp64 , X86::DIVR_FST0r }, + { X86::DIV_FPE_Fp80 , X86::DIVR_FST0r }, + { X86::DIV_Fp32 , X86::DIVR_FST0r }, + { X86::DIV_Fp64 , X86::DIVR_FST0r }, + { X86::DIV_Fp80 , X86::DIVR_FST0r }, + { X86::MUL_FPE_Fp32 , X86::MUL_FST0r }, // commutative + { X86::MUL_FPE_Fp64 , X86::MUL_FST0r }, // commutative + { X86::MUL_FPE_Fp80 , X86::MUL_FST0r }, // commutative + { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative + { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative + { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative + { X86::SUB_FPE_Fp32 , X86::SUBR_FST0r }, + { X86::SUB_FPE_Fp64 , X86::SUBR_FST0r }, + { X86::SUB_FPE_Fp80 , X86::SUBR_FST0r }, + { X86::SUB_Fp32 , X86::SUBR_FST0r }, + { X86::SUB_Fp64 , X86::SUBR_FST0r }, + { X86::SUB_Fp80 , X86::SUBR_FST0r }, }; // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) static const TableEntry ForwardSTiTable[] = { - { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative - { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative - { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative - { X86::DIV_Fp32 , X86::DIVR_FrST0 }, - { X86::DIV_Fp64 , X86::DIVR_FrST0 }, - { X86::DIV_Fp80 , X86::DIVR_FrST0 }, - { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative - { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative - { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative - { X86::SUB_Fp32 , X86::SUBR_FrST0 }, - { X86::SUB_Fp64 , X86::SUBR_FrST0 }, - { X86::SUB_Fp80 , X86::SUBR_FrST0 }, + { X86::ADD_FPE_Fp32 , X86::ADD_FrST0 }, // commutative + { X86::ADD_FPE_Fp64 , X86::ADD_FrST0 }, // commutative + { X86::ADD_FPE_Fp80 , X86::ADD_FrST0 }, // commutative + { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative + { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative + { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative + { X86::DIV_FPE_Fp32 , X86::DIVR_FrST0 }, + { X86::DIV_FPE_Fp64 , X86::DIVR_FrST0 }, + { X86::DIV_FPE_Fp80 , X86::DIVR_FrST0 }, + { X86::DIV_Fp32 , X86::DIVR_FrST0 }, + { X86::DIV_Fp64 , X86::DIVR_FrST0 }, + { X86::DIV_Fp80 , X86::DIVR_FrST0 }, + { X86::MUL_FPE_Fp32 , X86::MUL_FrST0 }, // commutative + { X86::MUL_FPE_Fp64 , X86::MUL_FrST0 }, // commutative + { X86::MUL_FPE_Fp80 , X86::MUL_FrST0 }, // commutative + { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative + { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative + { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative + { X86::SUB_FPE_Fp32 , X86::SUBR_FrST0 }, + { X86::SUB_FPE_Fp64 , X86::SUBR_FrST0 }, + { X86::SUB_FPE_Fp80 , X86::SUBR_FrST0 }, + { X86::SUB_Fp32 , X86::SUBR_FrST0 }, + { X86::SUB_Fp64 , X86::SUBR_FrST0 }, + { X86::SUB_Fp80 , X86::SUBR_FrST0 }, }; // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) static const TableEntry ReverseSTiTable[] = { - { X86::ADD_Fp32 , X86::ADD_FrST0 }, - { X86::ADD_Fp64 , X86::ADD_FrST0 }, - { X86::ADD_Fp80 , X86::ADD_FrST0 }, - { X86::DIV_Fp32 , X86::DIV_FrST0 }, - { X86::DIV_Fp64 , X86::DIV_FrST0 }, - { X86::DIV_Fp80 , X86::DIV_FrST0 }, - { X86::MUL_Fp32 , X86::MUL_FrST0 }, - { X86::MUL_Fp64 , X86::MUL_FrST0 }, - { X86::MUL_Fp80 , X86::MUL_FrST0 }, - { X86::SUB_Fp32 , X86::SUB_FrST0 }, - { X86::SUB_Fp64 , X86::SUB_FrST0 }, - { X86::SUB_Fp80 , X86::SUB_FrST0 }, + { X86::ADD_FPE_Fp32 , X86::ADD_FrST0 }, + { X86::ADD_FPE_Fp64 , X86::ADD_FrST0 }, + { X86::ADD_FPE_Fp80 , X86::ADD_FrST0 }, + { X86::ADD_Fp32 , X86::ADD_FrST0 }, + { X86::ADD_Fp64 , X86::ADD_FrST0 }, + { X86::ADD_Fp80 , X86::ADD_FrST0 }, + { X86::DIV_FPE_Fp32 , X86::DIV_FrST0 }, + { X86::DIV_FPE_Fp64 , X86::DIV_FrST0 }, + { X86::DIV_FPE_Fp80 , X86::DIV_FrST0 }, + { X86::DIV_Fp32 , X86::DIV_FrST0 }, + { X86::DIV_Fp64 , X86::DIV_FrST0 }, + { X86::DIV_Fp80 , X86::DIV_FrST0 }, + { X86::MUL_FPE_Fp32 , X86::MUL_FrST0 }, + { X86::MUL_FPE_Fp64 , X86::MUL_FrST0 }, + { X86::MUL_FPE_Fp80 , X86::MUL_FrST0 }, + { X86::MUL_Fp32 , X86::MUL_FrST0 }, + { X86::MUL_Fp64 , X86::MUL_FrST0 }, + { X86::MUL_Fp80 , X86::MUL_FrST0 }, + { X86::SUB_FPE_Fp32 , X86::SUB_FrST0 }, + { X86::SUB_FPE_Fp64 , X86::SUB_FrST0 }, + { X86::SUB_FPE_Fp80 , X86::SUB_FrST0 }, + { X86::SUB_Fp32 , X86::SUB_FrST0 }, + { X86::SUB_Fp64 , X86::SUB_FrST0 }, + { X86::SUB_Fp80 , X86::SUB_FrST0 }, }; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -207,6 +207,10 @@ FSUB_RND, FMUL_RND, FDIV_RND, + FADDWCHAIN_RND, + FSUBWCHAIN_RND, + FMULWCHAIN_RND, + FDIVWCHAIN_RND, FMAX_RND, FMIN_RND, FSQRT_RND, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -320,12 +320,16 @@ // This is what the CRT headers do - `fmodf` is an inline header // function casting to f64 and calling `fmod`. setOperationAction(ISD::FREM , MVT::f32 , Promote); + setOperationAction(ISD::FREM_W_CHAIN , MVT::f32 , Promote); } else { setOperationAction(ISD::FREM , MVT::f32 , Expand); + setOperationAction(ISD::FREM_W_CHAIN , MVT::f32 , Expand); } setOperationAction(ISD::FREM , MVT::f64 , Expand); setOperationAction(ISD::FREM , MVT::f80 , Expand); + setOperationAction(ISD::FREM_W_CHAIN , MVT::f64 , Expand); + setOperationAction(ISD::FREM_W_CHAIN , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); // Promote the i8 variants and force them on up to i32 which has a shorter @@ -673,13 +677,17 @@ setOperationAction(ISD::ADD , VT, Expand); setOperationAction(ISD::SUB , VT, Expand); setOperationAction(ISD::FADD, VT, Expand); + setOperationAction(ISD::FADD_W_CHAIN, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); + setOperationAction(ISD::FSUB_W_CHAIN, VT, Expand); setOperationAction(ISD::MUL , VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); + setOperationAction(ISD::FMUL_W_CHAIN, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FDIV_W_CHAIN, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::LOAD, VT, Expand); @@ -694,6 +702,7 @@ setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM_W_CHAIN, VT, Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); @@ -782,9 +791,13 @@ addRegisterClass(MVT::v4f32, &X86::VR128RegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v4f32, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::FABS, MVT::v4f32, Custom); @@ -824,9 +837,13 @@ setOperationAction(ISD::SUB, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::FADD, MVT::v2f64, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); setOperationAction(ISD::FABS, MVT::v2f64, Custom); @@ -1062,9 +1079,13 @@ setOperationAction(ISD::LOAD, MVT::v4i64, Legal); setOperationAction(ISD::FADD, MVT::v8f32, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v8f32, Legal); setOperationAction(ISD::FSUB, MVT::v8f32, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v8f32, Legal); setOperationAction(ISD::FMUL, MVT::v8f32, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v8f32, Legal); setOperationAction(ISD::FDIV, MVT::v8f32, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal); setOperationAction(ISD::FCEIL, MVT::v8f32, Legal); @@ -1075,9 +1096,13 @@ setOperationAction(ISD::FABS, MVT::v8f32, Custom); setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); @@ -1335,16 +1360,24 @@ setOperationAction(ISD::LOAD, MVT::v16i1, Legal); setOperationAction(ISD::FADD, MVT::v16f32, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v16f32, Legal); setOperationAction(ISD::FSUB, MVT::v16f32, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v16f32, Legal); setOperationAction(ISD::FMUL, MVT::v16f32, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v16f32, Legal); setOperationAction(ISD::FDIV, MVT::v16f32, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v16f32, Legal); setOperationAction(ISD::FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::FNEG, MVT::v16f32, Custom); setOperationAction(ISD::FADD, MVT::v8f64, Legal); + setOperationAction(ISD::FADD_W_CHAIN, MVT::v8f64, Legal); setOperationAction(ISD::FSUB, MVT::v8f64, Legal); + setOperationAction(ISD::FSUB_W_CHAIN, MVT::v8f64, Legal); setOperationAction(ISD::FMUL, MVT::v8f64, Legal); + setOperationAction(ISD::FMUL_W_CHAIN, MVT::v8f64, Legal); setOperationAction(ISD::FDIV, MVT::v8f64, Legal); + setOperationAction(ISD::FDIV_W_CHAIN, MVT::v8f64, Legal); setOperationAction(ISD::FSQRT, MVT::v8f64, Legal); setOperationAction(ISD::FNEG, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); @@ -20454,9 +20487,13 @@ case X86ISD::EXP2: return "X86ISD::EXP2"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; + case X86ISD::FADDWCHAIN_RND: return "X86ISD::FADDWCHAIN_RND"; case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; + case X86ISD::FSUBWCHAIN_RND: return "X86ISD::FSUBWCHAIN_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; + case X86ISD::FMULWCHAIN_RND: return "X86ISD::FMULWCHAIN_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; + case X86ISD::FDIVWCHAIN_RND: return "X86ISD::FDIVWCHAIN_RND"; case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -3703,6 +3703,16 @@ XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>; +let isCodeGenOnly = 1 in { + defm VADD_FPE : avx512_binop_s_round<0x58, "vadd", faddwchain, + X86faddRndWChain, SSE_ALU_ITINS_S, 1>; + defm VSUB_FPE : avx512_binop_s_round<0x5C, "vsub", fsubwchain, + X86fsubRndWChain, SSE_ALU_ITINS_S, 0>; + defm VMUL_FPE : avx512_binop_s_round<0x59, "vmul", fmulwchain, + X86fmulRndWChain, SSE_ALU_ITINS_S, 1>; + defm VDIV_FPE : avx512_binop_s_round<0x5E, "vdiv", fdivwchain, + X86fdivRndWChain, SSE_ALU_ITINS_S, 0>; +} defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>; defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>; defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>; @@ -3791,6 +3801,12 @@ defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>; +let isCodeGenOnly = 1 in { + defm VADD_FPE : avx512_fp_binop_p<0x58, "vadd", faddwchain, 1>; + defm VSUB_FPE : avx512_fp_binop_p<0x5C, "vsub", fsubwchain, 1>; + defm VMUL_FPE : avx512_fp_binop_p<0x59, "vmul", fmulwchain, 1>; + defm VDIV_FPE : avx512_fp_binop_p<0x5E, "vdiv", fdivwchain, 1>; +} defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>; defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, Index: lib/Target/X86/X86InstrFPStack.td =================================================================== --- lib/Target/X86/X86InstrFPStack.td +++ lib/Target/X86/X86InstrFPStack.td @@ -239,18 +239,34 @@ defm SUB : FPBinary_rr; defm MUL : FPBinary_rr; defm DIV : FPBinary_rr; +let isCodeGenOnly = 1 in { + defm ADD_FPE : FPBinary_rr; + defm SUB_FPE : FPBinary_rr; + defm MUL_FPE : FPBinary_rr; + defm DIV_FPE : FPBinary_rr; +} // Sets the scheduling resources for the actual NAME#_Fm defintions. let SchedRW = [WriteFAddLd] in { defm ADD : FPBinary; +let isCodeGenOnly = 1 in + defm ADD_FPE : FPBinary; defm SUB : FPBinary; +let isCodeGenOnly = 1 in + defm SUB_FPE : FPBinary; defm SUBR: FPBinary; } let SchedRW = [WriteFMulLd] in { defm MUL : FPBinary; +let isCodeGenOnly = 1 in + defm MUL_FPE : FPBinary; } let SchedRW = [WriteFDivLd] in { defm DIV : FPBinary; defm DIVR: FPBinary; +let isCodeGenOnly = 1 in { + defm DIV_FPE : FPBinary; + defm DIVR_FPE : FPBinary; +} } } Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -441,6 +441,10 @@ def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; +def X86faddRndWChain : SDNode<"X86ISD::FADDWCHAIN_RND", SDTFPBinOpRound>; +def X86fsubRndWChain : SDNode<"X86ISD::FSUBWCHAIN_RND", SDTFPBinOpRound>; +def X86fmulRndWChain : SDNode<"X86ISD::FMULWCHAIN_RND", SDTFPBinOpRound>; +def X86fdivRndWChain : SDNode<"X86ISD::FDIVWCHAIN_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -262,8 +262,8 @@ multiclass sse12_fp_scalar_int opc, string OpcodeStr, RegisterClass RC, string asm, string SSEVer, string FPSizeStr, Operand memopr, ComplexPattern mem_cpat, - Domain d, OpndItins itins, bit Is2Addr = 1> { -let isCodeGenOnly = 1 in { + Domain d, OpndItins itins, bit Is2Addr, bit hse> { +let isCodeGenOnly = 1, hasSideEffects = hse in { def rr_Int : SI_Int opc, string OpcodeStr, - SizeItins itins> { + SizeItins itins, bit hse = 0> { defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG; + SSEPackedSingle, itins.s, 0, hse>, XS, VEX_4V, VEX_LIG; defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V, VEX_LIG; + SSEPackedDouble, itins.d, 0, hse>, XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar_int, XS; + SSEPackedSingle, itins.s, 1, hse>, XS; defm SD : sse12_fp_scalar_int, XD; + SSEPackedDouble, itins.d, 1, hse>, XD; } } @@ -3107,16 +3107,36 @@ defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; +let isCodeGenOnly = 1 in +defm ADD_FPE : + basic_sse12_fp_binop_p<0x58, "add", faddwchain, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x58, "add", faddwchain, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 1>; defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; +let isCodeGenOnly = 1 in +defm MUL_FPE : + basic_sse12_fp_binop_p<0x59, "mul", fmulwchain, SSE_MUL_ITINS_P>, + basic_sse12_fp_binop_s<0x59, "mul", fmulwchain, SSE_MUL_ITINS_S>, + basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 1>; let isCommutable = 0 in { defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; + let isCodeGenOnly = 1 in + defm SUB_FPE : + basic_sse12_fp_binop_p<0x5C, "sub", fsubwchain, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x5C, "sub", fsubwchain, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 1>; defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; + let isCodeGenOnly = 1 in + defm DIV_FPE : + basic_sse12_fp_binop_p<0x5E, "div", fdivwchain, SSE_DIV_ITINS_P>, + basic_sse12_fp_binop_s<0x5E, "div", fdivwchain, SSE_DIV_ITINS_S>, + basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 1>; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; @@ -3228,9 +3248,13 @@ } defm : scalar_math_f32_patterns; +defm : scalar_math_f32_patterns; defm : scalar_math_f32_patterns; +defm : scalar_math_f32_patterns; defm : scalar_math_f32_patterns; +defm : scalar_math_f32_patterns; defm : scalar_math_f32_patterns; +defm : scalar_math_f32_patterns; multiclass scalar_math_f64_patterns { let Predicates = [UseSSE2] in { @@ -3291,9 +3315,13 @@ } defm : scalar_math_f64_patterns; +defm : scalar_math_f64_patterns; defm : scalar_math_f64_patterns; +defm : scalar_math_f64_patterns; defm : scalar_math_f64_patterns; +defm : scalar_math_f64_patterns; defm : scalar_math_f64_patterns; +defm : scalar_math_f64_patterns; /// Unop Arithmetic Index: test/CodeGen/ARM/fpenv-call-order.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fpenv-call-order.ll @@ -0,0 +1,34 @@ +; RUN: llc -enable-except-access-fp-math -enable-control-access-fp-math -O0 < %s | FileCheck %s +; RUN: llc -enable-except-access-fp-math -enable-control-access-fp-math < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabihf" + +; When floating-point environment is not examined, reordering instructions +; doesn't harm, otherwise library call that checks floating-point environment +; state can be moved before the actual computation, producing wrong results (not +; affected by side-effects of operations). + +; CHECK: vadd.f32 +; -CHECK: vsub.f32 +; CHECK: bl function + +; Function Attrs: nounwind +define float @f(float %x, float %y) { +entry: + %res1 = fadd float %x, %y + %res2 = fsub float %x, %res1 + %a = call i32 @function0() + tail call void @function(i32 %a) + ret float %res2 +} + +declare i32 @function0() +declare void @function(i32) + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!""} Index: test/CodeGen/X86/avx2-vbroadcast.ll =================================================================== --- test/CodeGen/X86/avx2-vbroadcast.ll +++ test/CodeGen/X86/avx2-vbroadcast.ll @@ -472,8 +472,8 @@ br i1 undef, label %ret, label %footer329VF footer329VF: - %A.0.inVF = fmul float undef, 6.553600e+04 - %B.0.in407VF = fmul <8 x float> undef, + %A.0.inVF = fmul nrnd nexc float undef, 6.553600e+04 + %B.0.in407VF = fmul nrnd nexc <8 x float> undef, %A.0VF = fptosi float %A.0.inVF to i32 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32> %0 = and <8 x i32> %B.0408VF, Index: test/CodeGen/X86/fma_patterns.ll =================================================================== --- test/CodeGen/X86/fma_patterns.ll +++ test/CodeGen/X86/fma_patterns.ll @@ -1130,8 +1130,8 @@ ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 ; AVX512-NEXT: retq - %m = fmul nsz double %x, %y - %n = fsub double -0.0, %m + %m = fmul nrnd nexc nsz double %x, %y + %n = fsub nrnd nexc double -0.0, %m ret double %n } @@ -1153,8 +1153,8 @@ ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ; AVX512-NEXT: retq - %m = fmul nsz <4 x float> %x, %y - %n = fsub <4 x float> , %m + %m = fmul nrnd nexc nsz <4 x float> %x, %y + %n = fsub nrnd nexc <4 x float> , %m ret <4 x float> %n } @@ -1176,8 +1176,8 @@ ; AVX512-NEXT: vxorps %ymm2, %ymm2, %ymm2 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 ; AVX512-NEXT: retq - %m = fmul nsz <4 x double> %x, %y - %n = fsub <4 x double> , %m + %m = fmul nrnd nexc nsz <4 x double> %x, %y + %n = fsub nrnd nexc <4 x double> , %m ret <4 x double> %n } @@ -1187,8 +1187,8 @@ ; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 ; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 ; ALL-NEXT: retq - %m = fmul <4 x double> %x, %y - %n = fsub <4 x double> , %m + %m = fmul nrnd nexc <4 x double> %x, %y + %n = fsub nrnd nexc <4 x double> , %m ret <4 x double> %n } Index: test/CodeGen/X86/fma_patterns_wide.ll =================================================================== --- test/CodeGen/X86/fma_patterns_wide.ll +++ test/CodeGen/X86/fma_patterns_wide.ll @@ -759,8 +759,8 @@ ; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 ; AVX512-NEXT: retq - %m = fmul nsz <16 x float> %x, %y - %n = fsub <16 x float> , %m + %m = fmul nrnd nexc nsz <16 x float> %x, %y + %n = fsub nrnd nexc <16 x float> , %m ret <16 x float> %n } @@ -784,8 +784,8 @@ ; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 ; AVX512-NEXT: retq - %m = fmul nsz <8 x double> %x, %y - %n = fsub <8 x double> , %m + %m = fmul nrnd nexc nsz <8 x double> %x, %y + %n = fsub nrnd nexc <8 x double> , %m ret <8 x double> %n } @@ -813,8 +813,8 @@ ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vxorpd {{.*}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: retq - %m = fmul <8 x double> %x, %y - %n = fsub <8 x double> , %m + %m = fmul nrnd nexc <8 x double> %x, %y + %n = fsub nrnd nexc <8 x double> , %m ret <8 x double> %n }