diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -140,20 +140,22 @@ std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); - SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128); + void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl &Results); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); - SDValue ExpandArgFPLibCall(SDNode *Node, - RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128); + void ExpandArgFPLibCall(SDNode *Node, + RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl &Results); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results); @@ -2117,15 +2119,13 @@ return CallInfo; } -SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128) { - if (Node->isStrictFPOpcode()) - Node = DAG.mutateStrictFPToFP(Node); - +void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl &Results) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2135,7 +2135,16 @@ case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } - return ExpandLibCall(LC, Node, false); + + if (Node->isStrictFPOpcode()) { + // FIXME: This doesn't support tail calls. + std::pair Tmp = ExpandChainLibCall(LC, Node, false); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } else { + SDValue Tmp = ExpandLibCall(LC, Node, false); + Results.push_back(Tmp); + } } SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, @@ -2158,17 +2167,17 @@ /// Expand the node to a libcall based on first argument type (for instance /// lround and its variant). -SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128) { - if (Node->isStrictFPOpcode()) - Node = DAG.mutateStrictFPToFP(Node); +void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl &Results) { + EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType(); RTLIB::Libcall LC; - switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { + switch (InVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -2177,7 +2186,15 @@ case MVT::ppcf128: LC = Call_PPCF128; break; } - return ExpandLibCall(LC, Node, false); + if (Node->isStrictFPOpcode()) { + // FIXME: This doesn't support tail calls. + std::pair Tmp = ExpandChainLibCall(LC, Node, false); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } else { + SDValue Tmp = ExpandLibCall(LC, Node, false); + Results.push_back(Tmp); + } } /// Issue libcalls to __{u}divmod to compute div / rem pairs. @@ -3818,38 +3835,38 @@ } case ISD::FMINNUM: case ISD::STRICT_FMINNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128, Results); break; case ISD::FMAXNUM: case ISD::STRICT_FMAXNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128, Results); break; case ISD::FSQRT: case ISD::STRICT_FSQRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128, Results); break; case ISD::FCBRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, - RTLIB::CBRT_F80, RTLIB::CBRT_F128, - RTLIB::CBRT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, + RTLIB::CBRT_F80, RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128, Results); break; case ISD::FSIN: case ISD::STRICT_FSIN: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); + ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128, Results); break; case ISD::FCOS: case ISD::STRICT_FCOS: - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); + ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128, Results); break; case ISD::FSINCOS: // Expand into sincos libcall. @@ -3858,107 +3875,107 @@ case ISD::FLOG: case ISD::STRICT_FLOG: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, - RTLIB::LOG_FINITE_F64, - RTLIB::LOG_FINITE_F80, - RTLIB::LOG_FINITE_F128, - RTLIB::LOG_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, + RTLIB::LOG_FINITE_F64, + RTLIB::LOG_FINITE_F80, + RTLIB::LOG_FINITE_F128, + RTLIB::LOG_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128, Results); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, - RTLIB::LOG2_FINITE_F64, - RTLIB::LOG2_FINITE_F80, - RTLIB::LOG2_FINITE_F128, - RTLIB::LOG2_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, + RTLIB::LOG2_FINITE_F64, + RTLIB::LOG2_FINITE_F80, + RTLIB::LOG2_FINITE_F128, + RTLIB::LOG2_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128, Results); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, - RTLIB::LOG10_FINITE_F64, - RTLIB::LOG10_FINITE_F80, - RTLIB::LOG10_FINITE_F128, - RTLIB::LOG10_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, + RTLIB::LOG10_FINITE_F64, + RTLIB::LOG10_FINITE_F80, + RTLIB::LOG10_FINITE_F128, + RTLIB::LOG10_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128, Results); break; case ISD::FEXP: case ISD::STRICT_FEXP: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, - RTLIB::EXP_FINITE_F64, - RTLIB::EXP_FINITE_F80, - RTLIB::EXP_FINITE_F128, - RTLIB::EXP_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, + RTLIB::EXP_FINITE_F64, + RTLIB::EXP_FINITE_F80, + RTLIB::EXP_FINITE_F128, + RTLIB::EXP_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128, Results); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, - RTLIB::EXP2_FINITE_F64, - RTLIB::EXP2_FINITE_F80, - RTLIB::EXP2_FINITE_F128, - RTLIB::EXP2_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, + RTLIB::EXP2_FINITE_F64, + RTLIB::EXP2_FINITE_F80, + RTLIB::EXP2_FINITE_F128, + RTLIB::EXP2_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128, Results); break; case ISD::FTRUNC: case ISD::STRICT_FTRUNC: - Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128)); + ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128, Results); break; case ISD::FFLOOR: case ISD::STRICT_FFLOOR: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128, Results); break; case ISD::FCEIL: case ISD::STRICT_FCEIL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128)); + ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128, Results); break; case ISD::FRINT: case ISD::STRICT_FRINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128, Results); break; case ISD::FNEARBYINT: case ISD::STRICT_FNEARBYINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128, Results); break; case ISD::FROUND: case ISD::STRICT_FROUND: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128)); + ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128, Results); break; case ISD::FPOWI: case ISD::STRICT_FPOWI: { @@ -3981,78 +3998,78 @@ Exponent)); break; } - Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128)); + ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128, Results); break; } case ISD::FPOW: case ISD::STRICT_FPOW: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, - RTLIB::POW_FINITE_F64, - RTLIB::POW_FINITE_F80, - RTLIB::POW_FINITE_F128, - RTLIB::POW_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, + RTLIB::POW_FINITE_F64, + RTLIB::POW_FINITE_F80, + RTLIB::POW_FINITE_F128, + RTLIB::POW_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); + ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128, Results); break; case ISD::LROUND: case ISD::STRICT_LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128, Results); break; case ISD::LLROUND: case ISD::STRICT_LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128, Results); break; case ISD::LRINT: case ISD::STRICT_LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128, Results); break; case ISD::LLRINT: case ISD::STRICT_LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128, Results); break; case ISD::FDIV: - Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128)); + ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128, Results); break; case ISD::FREM: case ISD::STRICT_FREM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128)); + ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128, Results); break; case ISD::FMA: case ISD::STRICT_FMA: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_F128, - RTLIB::FMA_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128, Results); break; case ISD::FADD: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128)); + ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128, Results); break; case ISD::FMUL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128)); + ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128, Results); break; case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { @@ -4067,9 +4084,9 @@ break; } case ISD::FSUB: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128)); + ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -244,8 +244,7 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: brasl %r14, fmod@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f9 +; S390X-NEXT: ldr %f2, %f9 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -317,9 +316,8 @@ ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: ler %f2, %f8 ; S390X-NEXT: brasl %r14, fmodf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f9 ; S390X-NEXT: ler %f2, %f10 +; S390X-NEXT: ler %f4, %f9 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -501,10 +499,9 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: brasl %r14, fmod@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f9 -; S390X-NEXT: ldr %f2, %f10 -; S390X-NEXT: ldr %f4, %f11 +; S390X-NEXT: ldr %f2, %f11 +; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f6, %f9 ; S390X-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload @@ -1288,8 +1285,7 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: brasl %r14, pow@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f9 +; S390X-NEXT: ldr %f2, %f9 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -1363,9 +1359,8 @@ ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: ler %f2, %f8 ; S390X-NEXT: brasl %r14, powf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f9 ; S390X-NEXT: ler %f2, %f10 +; S390X-NEXT: ler %f4, %f9 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -1553,10 +1548,9 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: brasl %r14, pow@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f9 -; S390X-NEXT: ldr %f2, %f10 -; S390X-NEXT: ldr %f4, %f11 +; S390X-NEXT: ldr %f2, %f11 +; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f6, %f9 ; S390X-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload @@ -1676,8 +1670,7 @@ ; S390X-NEXT: lghi %r2, 3 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, __powidf2@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -1741,9 +1734,8 @@ ; S390X-NEXT: lghi %r2, 3 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, __powisf2@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -1803,12 +1795,12 @@ ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: larl %r1, .LCPI38_0 -; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: ldeb %f0, 0(%r1) ; S390X-NEXT: lgr %r13, %r2 ; S390X-NEXT: lghi %r2, 3 ; S390X-NEXT: brasl %r14, __powidf2@PLT ; S390X-NEXT: larl %r1, .LCPI38_1 -; S390X-NEXT: ldeb %f1, 0(%r1) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: lghi %r2, 3 ; S390X-NEXT: ldr %f0, %f1 @@ -1819,9 +1811,9 @@ ; S390X-NEXT: lghi %r2, 3 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, __powidf2@PLT -; S390X-NEXT: std %f0, 8(%r13) -; S390X-NEXT: std %f9, 0(%r13) -; S390X-NEXT: std %f8, 16(%r13) +; S390X-NEXT: std %f0, 16(%r13) +; S390X-NEXT: std %f9, 8(%r13) +; S390X-NEXT: std %f8, 0(%r13) ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r13, %r15, 280(%r15) @@ -1906,10 +1898,9 @@ ; S390X-NEXT: lghi %r2, 3 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, __powidf2@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -2010,15 +2001,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI41_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, sin@PLT ; S390X-NEXT: larl %r1, .LCPI41_1 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, sin@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -2076,9 +2066,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, sinf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -2214,7 +2203,7 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI44_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, sin@PLT ; S390X-NEXT: larl %r1, .LCPI44_1 ; S390X-NEXT: ld %f1, 0(%r1) @@ -2227,14 +2216,13 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, sin@PLT ; S390X-NEXT: larl %r1, .LCPI44_3 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, sin@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -2330,15 +2318,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI46_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, cos@PLT ; S390X-NEXT: larl %r1, .LCPI46_1 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, cos@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -2396,9 +2383,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, cosf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -2534,7 +2520,7 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI49_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, cos@PLT ; S390X-NEXT: larl %r1, .LCPI49_1 ; S390X-NEXT: ld %f1, 0(%r1) @@ -2547,14 +2533,13 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, cos@PLT ; S390X-NEXT: larl %r1, .LCPI49_3 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, cos@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -2650,15 +2635,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI51_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, exp@PLT ; S390X-NEXT: larl %r1, .LCPI51_1 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, exp@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -2716,9 +2700,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, expf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -2854,7 +2837,7 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI54_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, exp@PLT ; S390X-NEXT: larl %r1, .LCPI54_1 ; S390X-NEXT: ld %f1, 0(%r1) @@ -2867,14 +2850,13 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, exp@PLT ; S390X-NEXT: larl %r1, .LCPI54_3 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, exp@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -2970,15 +2952,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI56_0 -; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: ldeb %f0, 0(%r1) ; S390X-NEXT: brasl %r14, exp2@PLT ; S390X-NEXT: larl %r1, .LCPI56_1 -; S390X-NEXT: ldeb %f1, 0(%r1) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, exp2@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -3036,9 +3017,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, exp2f@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -3191,10 +3171,9 @@ ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, exp2@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -3290,15 +3269,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI61_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, log@PLT ; S390X-NEXT: larl %r1, .LCPI61_1 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -3356,9 +3334,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, logf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -3494,7 +3471,7 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI64_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, log@PLT ; S390X-NEXT: larl %r1, .LCPI64_1 ; S390X-NEXT: ld %f1, 0(%r1) @@ -3507,14 +3484,13 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log@PLT ; S390X-NEXT: larl %r1, .LCPI64_3 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -3610,15 +3586,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI66_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, log10@PLT ; S390X-NEXT: larl %r1, .LCPI66_1 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log10@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -3676,9 +3651,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, log10f@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -3814,7 +3788,7 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI69_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, log10@PLT ; S390X-NEXT: larl %r1, .LCPI69_1 ; S390X-NEXT: ld %f1, 0(%r1) @@ -3827,14 +3801,13 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log10@PLT ; S390X-NEXT: larl %r1, .LCPI69_3 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log10@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -3930,15 +3903,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI71_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, log2@PLT ; S390X-NEXT: larl %r1, .LCPI71_1 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log2@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -3996,9 +3968,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, log2f@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -4134,7 +4105,7 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI74_0 -; S390X-NEXT: ldeb %f0, 0(%r1) +; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, log2@PLT ; S390X-NEXT: larl %r1, .LCPI74_1 ; S390X-NEXT: ld %f1, 0(%r1) @@ -4147,14 +4118,13 @@ ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log2@PLT ; S390X-NEXT: larl %r1, .LCPI74_3 -; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldeb %f1, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, log2@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -4396,15 +4366,14 @@ ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: larl %r1, .LCPI81_0 -; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: ldeb %f0, 0(%r1) ; S390X-NEXT: brasl %r14, nearbyint@PLT ; S390X-NEXT: larl %r1, .LCPI81_1 -; S390X-NEXT: ldeb %f1, 0(%r1) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, nearbyint@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -4448,9 +4417,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, nearbyintf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -4565,10 +4533,9 @@ ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, nearbyint@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -4655,8 +4622,7 @@ ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, fmax@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -4709,10 +4675,10 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI87_0 -; S390X-NEXT: le %f8, 0(%r1) +; S390X-NEXT: le %f0, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI87_1 -; S390X-NEXT: le %f2, 0(%r1) -; S390X-NEXT: ler %f0, %f8 +; S390X-NEXT: le %f8, 0(%r1) +; S390X-NEXT: ler %f2, %f8 ; S390X-NEXT: brasl %r14, fmaxf@PLT ; S390X-NEXT: larl %r1, .LCPI87_2 ; S390X-NEXT: le %f1, 0(%r1) @@ -4722,14 +4688,12 @@ ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, fmaxf@PLT ; S390X-NEXT: larl %r1, .LCPI87_4 -; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: le %f2, 0(%r1) ; S390X-NEXT: ler %f10, %f0 -; S390X-NEXT: ler %f0, %f1 -; S390X-NEXT: ler %f2, %f8 +; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: brasl %r14, fmaxf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f9 ; S390X-NEXT: ler %f2, %f10 +; S390X-NEXT: ler %f4, %f9 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -4914,10 +4878,9 @@ ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, fmax@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -5038,8 +5001,7 @@ ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, fmin@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -5092,10 +5054,10 @@ ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 ; S390X-NEXT: larl %r1, .LCPI92_0 -; S390X-NEXT: le %f8, 0(%r1) +; S390X-NEXT: le %f0, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI92_1 -; S390X-NEXT: le %f2, 0(%r1) -; S390X-NEXT: ler %f0, %f8 +; S390X-NEXT: le %f8, 0(%r1) +; S390X-NEXT: ler %f2, %f8 ; S390X-NEXT: brasl %r14, fminf@PLT ; S390X-NEXT: larl %r1, .LCPI92_2 ; S390X-NEXT: le %f1, 0(%r1) @@ -5105,14 +5067,12 @@ ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, fminf@PLT ; S390X-NEXT: larl %r1, .LCPI92_4 -; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: le %f2, 0(%r1) ; S390X-NEXT: ler %f10, %f0 -; S390X-NEXT: ler %f0, %f1 -; S390X-NEXT: ler %f2, %f8 +; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: brasl %r14, fminf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f9 ; S390X-NEXT: ler %f2, %f10 +; S390X-NEXT: ler %f4, %f9 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -5301,10 +5261,9 @@ ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, fmin@PLT -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: ldr %f0, %f8 -; S390X-NEXT: ldr %f2, %f9 -; S390X-NEXT: ldr %f4, %f10 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 ; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload @@ -5661,8 +5620,7 @@ ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, ceil@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -5706,9 +5664,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, ceilf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -5837,8 +5794,7 @@ ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, floor@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -5882,9 +5838,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, floorf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -6012,8 +5967,7 @@ ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, round@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -6057,9 +6011,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, roundf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) @@ -6188,8 +6141,7 @@ ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, trunc@PLT -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f8 ; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 280(%r15) ; S390X-NEXT: br %r14 @@ -6233,9 +6185,8 @@ ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, truncf@PLT -; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: ler %f0, %f8 ; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 ; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload ; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload ; S390X-NEXT: lmg %r14, %r15, 288(%r15) diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll @@ -8,10 +8,15 @@ define float @f17() #0 { ; NOFMA-LABEL: f17: ; NOFMA: # %bb.0: # %entry +; NOFMA-NEXT: pushq %rax +; NOFMA-NEXT: .cfi_def_cfa_offset 16 ; NOFMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; NOFMA-NEXT: movaps %xmm0, %xmm1 ; NOFMA-NEXT: movaps %xmm0, %xmm2 -; NOFMA-NEXT: jmp fmaf # TAILCALL +; NOFMA-NEXT: callq fmaf +; NOFMA-NEXT: popq %rax +; NOFMA-NEXT: .cfi_def_cfa_offset 8 +; NOFMA-NEXT: retq ; ; FMA-LABEL: f17: ; FMA: # %bb.0: # %entry @@ -33,10 +38,15 @@ define double @f18() #0 { ; NOFMA-LABEL: f18: ; NOFMA: # %bb.0: # %entry +; NOFMA-NEXT: pushq %rax +; NOFMA-NEXT: .cfi_def_cfa_offset 16 ; NOFMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; NOFMA-NEXT: movaps %xmm0, %xmm1 ; NOFMA-NEXT: movaps %xmm0, %xmm2 -; NOFMA-NEXT: jmp fma # TAILCALL +; NOFMA-NEXT: callq fma +; NOFMA-NEXT: popq %rax +; NOFMA-NEXT: .cfi_def_cfa_offset 8 +; NOFMA-NEXT: retq ; ; FMA-LABEL: f18: ; FMA: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -257,15 +257,25 @@ ; ; SSE-LABEL: f6: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: jmp pow # TAILCALL +; SSE-NEXT: callq pow +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f6: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: jmp pow # TAILCALL +; AVX-NEXT: callq pow +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.pow.f64(double 42.1, double 3.0, @@ -290,15 +300,25 @@ ; ; SSE-LABEL: f7: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: movl $3, %edi -; SSE-NEXT: jmp __powidf2 # TAILCALL +; SSE-NEXT: callq __powidf2 +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f7: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: movl $3, %edi -; AVX-NEXT: jmp __powidf2 # TAILCALL +; AVX-NEXT: callq __powidf2 +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.powi.f64(double 42.1, i32 3, @@ -322,13 +342,23 @@ ; ; SSE-LABEL: f8: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp sin # TAILCALL +; SSE-NEXT: callq sin +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f8: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: jmp sin # TAILCALL +; AVX-NEXT: callq sin +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.sin.f64(double 42.0, metadata !"round.dynamic", @@ -351,13 +381,23 @@ ; ; SSE-LABEL: f9: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp cos # TAILCALL +; SSE-NEXT: callq cos +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f9: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: jmp cos # TAILCALL +; AVX-NEXT: callq cos +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.cos.f64(double 42.0, metadata !"round.dynamic", @@ -380,13 +420,23 @@ ; ; SSE-LABEL: f10: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp exp # TAILCALL +; SSE-NEXT: callq exp +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f10: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: jmp exp # TAILCALL +; AVX-NEXT: callq exp +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.exp.f64(double 42.0, metadata !"round.dynamic", @@ -409,13 +459,23 @@ ; ; SSE-LABEL: f11: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp exp2 # TAILCALL +; SSE-NEXT: callq exp2 +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f11: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: jmp exp2 # TAILCALL +; AVX-NEXT: callq exp2 +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.exp2.f64(double 42.1, metadata !"round.dynamic", @@ -438,13 +498,23 @@ ; ; SSE-LABEL: f12: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp log # TAILCALL +; SSE-NEXT: callq log +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f12: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: jmp log # TAILCALL +; AVX-NEXT: callq log +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.log.f64(double 42.0, metadata !"round.dynamic", @@ -467,13 +537,23 @@ ; ; SSE-LABEL: f13: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp log10 # TAILCALL +; SSE-NEXT: callq log10 +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f13: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: jmp log10 # TAILCALL +; AVX-NEXT: callq log10 +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.log10.f64(double 42.0, metadata !"round.dynamic", @@ -496,13 +576,23 @@ ; ; SSE-LABEL: f14: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp log2 # TAILCALL +; SSE-NEXT: callq log2 +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f14: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: jmp log2 # TAILCALL +; AVX-NEXT: callq log2 +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call double @llvm.experimental.constrained.log2.f64(double 42.0, metadata !"round.dynamic", @@ -525,8 +615,13 @@ ; ; SSE-LABEL: f15: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp rint # TAILCALL +; SSE-NEXT: callq rint +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f15: ; AVX: # %bb.0: # %entry @@ -556,8 +651,13 @@ ; ; SSE-LABEL: f16: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: jmp nearbyint # TAILCALL +; SSE-NEXT: callq nearbyint +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f16: ; AVX: # %bb.0: # %entry @@ -588,15 +688,25 @@ ; ; SSE-LABEL: f19: ; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: jmp fmod # TAILCALL +; SSE-NEXT: callq fmod +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f19: ; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: jmp fmod # TAILCALL +; AVX-NEXT: callq fmod +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %rem = call double @llvm.experimental.constrained.frem.f64( double 1.000000e+00, @@ -768,9 +878,34 @@ } define i32 @f23(double %x) #0 { -; COMMON-LABEL: f23: -; COMMON: # %bb.0: # %entry -; COMMON-NEXT: jmp lrint # TAILCALL +; X86-SSE-LABEL: f23: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll lrint +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: f23: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq lrint +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: f23: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq lrint +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.dynamic", @@ -779,9 +914,34 @@ } define i32 @f24(float %x) #0 { -; COMMON-LABEL: f24: -; COMMON: # %bb.0: # %entry -; COMMON-NEXT: jmp lrintf # TAILCALL +; X86-SSE-LABEL: f24: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: calll lrintf +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: f24: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq lrintf +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: f24: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq lrintf +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.dynamic", @@ -803,11 +963,21 @@ ; ; SSE-LABEL: f25: ; SSE: # %bb.0: # %entry -; SSE-NEXT: jmp llrint # TAILCALL +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq llrint +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f25: ; AVX: # %bb.0: # %entry -; AVX-NEXT: jmp llrint # TAILCALL +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq llrint +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.dynamic", @@ -829,11 +999,21 @@ ; ; SSE-LABEL: f26: ; SSE: # %bb.0: # %entry -; SSE-NEXT: jmp llrintf # TAILCALL +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq llrintf +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f26: ; AVX: # %bb.0: # %entry -; AVX-NEXT: jmp llrintf # TAILCALL +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq llrintf +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.dynamic", @@ -842,9 +1022,34 @@ } define i32 @f27(double %x) #0 { -; COMMON-LABEL: f27: -; COMMON: # %bb.0: # %entry -; COMMON-NEXT: jmp lround # TAILCALL +; X86-SSE-LABEL: f27: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll lround +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: f27: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq lround +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: f27: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq lround +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0 @@ -852,9 +1057,34 @@ } define i32 @f28(float %x) #0 { -; COMMON-LABEL: f28: -; COMMON: # %bb.0: # %entry -; COMMON-NEXT: jmp lroundf # TAILCALL +; X86-SSE-LABEL: f28: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: calll lroundf +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: f28: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq lroundf +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: f28: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq lroundf +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0 @@ -875,11 +1105,21 @@ ; ; SSE-LABEL: f29: ; SSE: # %bb.0: # %entry -; SSE-NEXT: jmp llround # TAILCALL +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq llround +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f29: ; AVX: # %bb.0: # %entry -; AVX-NEXT: jmp llround # TAILCALL +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq llround +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict") #0 @@ -900,11 +1140,21 @@ ; ; SSE-LABEL: f30: ; SSE: # %bb.0: # %entry -; SSE-NEXT: jmp llroundf # TAILCALL +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq llroundf +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq ; ; AVX-LABEL: f30: ; AVX: # %bb.0: # %entry -; AVX-NEXT: jmp llroundf # TAILCALL +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq llroundf +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %result = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -292,9 +292,9 @@ ; CHECK-NEXT: callq fmod ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -1102,9 +1102,9 @@ ; CHECK-NEXT: callq pow ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -1358,9 +1358,9 @@ ; CHECK-NEXT: callq __powidf2 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -1595,9 +1595,9 @@ ; CHECK-NEXT: callq sin ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -1819,9 +1819,9 @@ ; CHECK-NEXT: callq cos ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -2043,9 +2043,9 @@ ; CHECK-NEXT: callq exp ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -2267,9 +2267,9 @@ ; CHECK-NEXT: callq exp2 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -2491,9 +2491,9 @@ ; CHECK-NEXT: callq log ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -2715,9 +2715,9 @@ ; CHECK-NEXT: callq log10 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -2939,9 +2939,9 @@ ; CHECK-NEXT: callq log2 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -3141,9 +3141,9 @@ ; CHECK-NEXT: callq rint ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -3311,9 +3311,9 @@ ; CHECK-NEXT: callq nearbyint ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -3520,9 +3520,9 @@ ; CHECK-NEXT: callq fmax ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -3775,9 +3775,9 @@ ; CHECK-NEXT: callq fmin ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -5160,9 +5160,9 @@ ; CHECK-NEXT: callq ceil ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -5292,9 +5292,9 @@ ; CHECK-NEXT: callq floor ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -5446,9 +5446,9 @@ ; CHECK-NEXT: callq round ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -5590,9 +5590,9 @@ ; CHECK-NEXT: callq trunc ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8