diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14520,6 +14520,44 @@ When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. + +'``llvm.tan.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.tan`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.tan.f32(float %Val) + declare double @llvm.tan.f64(double %Val) + declare x86_fp80 @llvm.tan.f80(x86_fp80 %Val) + declare fp128 @llvm.tan.f128(fp128 %Val) + declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.tan.*``' intrinsics return the tangent of the operand. + +Arguments: +"""""""""" + +The argument and return value are floating-point numbers of the same type. + +Semantics: +"""""""""" + +Return the same value as a corresponding libm '``tan``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.pow.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -52,6 +52,7 @@ TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4)) TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4)) TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4)) TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4)) TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4)) TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4)) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1774,6 +1774,9 @@ case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::tan: + ISD = ISD::FTAN; + break; case Intrinsic::exp: ISD = ISD::FEXP; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -413,6 +413,7 @@ STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS, + STRICT_FTAN, STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, @@ -926,6 +927,7 @@ FCBRT, FSIN, FCOS, + FTAN, FPOWI, FPOW, FLOG, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -697,6 +697,7 @@ def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_anyint_ty]>; def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_pow : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_log : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -192,6 +192,11 @@ HANDLE_LIBCALL(COS_F80, "cosl") HANDLE_LIBCALL(COS_F128, "cosl") HANDLE_LIBCALL(COS_PPCF128, "cosl") +HANDLE_LIBCALL(TAN_F32, "tanf") +HANDLE_LIBCALL(TAN_F64, "tan") +HANDLE_LIBCALL(TAN_F80, "tanl") +HANDLE_LIBCALL(TAN_F128, "tanl") +HANDLE_LIBCALL(TAN_PPCF128, "tanl") HANDLE_LIBCALL(SINCOS_F32, nullptr) HANDLE_LIBCALL(SINCOS_F64, nullptr) HANDLE_LIBCALL(SINCOS_F80, nullptr) diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -496,6 +496,7 @@ def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>; def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>; def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>; +def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>; def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>; def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>; def flog2 : SDNode<"ISD::FLOG2" , SDTFPUnaryOp>; @@ -545,6 +546,8 @@ SDTFPUnaryOp, [SDNPHasChain]>; def strict_fcos : SDNode<"ISD::STRICT_FCOS", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_ftan : SDNode<"ISD::STRICT_FTAN", + SDTFPUnaryOp, [SDNPHasChain]>; def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fpow : SDNode<"ISD::STRICT_FPOW", diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -66,6 +66,7 @@ case Intrinsic::sqrt: // Begin floating-point. case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::tan: case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::log: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4060,6 +4060,12 @@ // Expand into sincos libcall. ExpandSinCosLibCall(Node, Results); break; + case ISD::FTAN: + case ISD::STRICT_FTAN: + ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, + RTLIB::TAN_F80, RTLIB::TAN_F128, + RTLIB::TAN_PPCF128, Results); + break; case ISD::FLOG: case ISD::STRICT_FLOG: ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, @@ -4876,6 +4882,7 @@ case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: @@ -4898,6 +4905,7 @@ case ISD::STRICT_FSQRT: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: + case ISD::STRICT_FTAN: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -121,6 +121,8 @@ case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break; case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FTAN: + case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::STRICT_FSUB: @@ -378,6 +380,15 @@ RTLIB::COS_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) { + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TAN_F32, + RTLIB::TAN_F64, + RTLIB::TAN_F80, + RTLIB::TAN_F128, + RTLIB::TAN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, @@ -1262,6 +1273,8 @@ case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break; case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::STRICT_FTAN: + case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::STRICT_FSUB: @@ -1603,6 +1616,14 @@ RTLIB::SIN_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TAN_F32, RTLIB::TAN_F64, + RTLIB::TAN_F80, RTLIB::TAN_F128, + RTLIB::TAN_PPCF128), Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2271,6 +2292,7 @@ case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: + case ISD::FTAN: case ISD::FTRUNC: case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; @@ -2638,6 +2660,7 @@ case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: + case ISD::FTAN: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; // Binary FP Operations diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -547,6 +547,7 @@ SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FTAN(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); SDValue SoftenFloatRes_FEXP2(SDNode *N); @@ -646,6 +647,7 @@ void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -368,6 +368,7 @@ case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: case ISD::FPOWI: case ISD::FPOW: case ISD::FLOG: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -103,6 +103,7 @@ case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FTAN: case ISD::FSQRT: case ISD::FTRUNC: case ISD::SIGN_EXTEND: @@ -1063,6 +1064,7 @@ case ISD::FROUNDEVEN: case ISD::VP_FROUNDEVEN: case ISD::FSIN: + case ISD::FTAN: case ISD::FSQRT: case ISD::VP_SQRT: case ISD::FTRUNC: case ISD::VP_FROUNDTOZERO: @@ -4124,6 +4126,7 @@ case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FTAN: case ISD::FSQRT: case ISD::FTRUNC: if (unrollExpandedOp()) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4868,6 +4868,7 @@ case ISD::FREM: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: case ISD::FMA: case ISD::FMAD: { if (SNaN) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6340,6 +6340,7 @@ case Intrinsic::fabs: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::tan: case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::trunc: @@ -6355,6 +6356,7 @@ case Intrinsic::fabs: Opcode = ISD::FABS; break; case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::tan: Opcode = ISD::FTAN; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; @@ -8454,6 +8456,12 @@ if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + if (visitUnaryFloatCall(I, ISD::FTAN)) + return; + break; case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -204,6 +204,8 @@ case ISD::FCOS: return "fcos"; case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; + case ISD::FTAN: return "ftan"; + case ISD::STRICT_FTAN: return "strict_ftan"; case ISD::FTRUNC: return "ftrunc"; case ISD::STRICT_FTRUNC: return "strict_ftrunc"; case ISD::FFLOOR: return "ffloor"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -898,7 +898,7 @@ setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, ISD::LLROUND, - ISD::LRINT, ISD::LLRINT}, + ISD::LRINT, ISD::LLRINT, ISD::FTAN}, {MVT::f32, MVT::f64, MVT::f128}, Expand); // Default ISD::TRAP to expand (which turns it into abort). diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -592,6 +592,7 @@ setOperationAction(ISD::FSIN, VT, Action); setOperationAction(ISD::FCOS, VT, Action); setOperationAction(ISD::FSINCOS, VT, Action); + setOperationAction(ISD::FTAN, VT, Action); setOperationAction(ISD::FSQRT, VT, Action); setOperationAction(ISD::FPOW, VT, Action); setOperationAction(ISD::FLOG, VT, Action); @@ -646,6 +647,7 @@ setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FTAN , VT, Expand); } // Half type will be promoted by default. @@ -720,12 +722,14 @@ setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FTAN , MVT::f32, Expand); if (UseX87) { // Always expand sin/cos functions even though x87 has an instruction. setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FTAN, MVT::f64, Expand); } } else if (UseX87) { // f32 and f64 in x87. @@ -741,6 +745,7 @@ setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FTAN , VT, Expand); } } @@ -810,6 +815,7 @@ setOperationAction(ISD::FSIN , MVT::f80, Expand); setOperationAction(ISD::FCOS , MVT::f80, Expand); setOperationAction(ISD::FSINCOS, MVT::f80, Expand); + setOperationAction(ISD::FTAN , MVT::f80, Expand); setOperationAction(ISD::FFLOOR, MVT::f80, Expand); setOperationAction(ISD::FCEIL, MVT::f80, Expand); @@ -866,6 +872,8 @@ setOperationAction(ISD::FCOS, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall); setOperationAction(ISD::FSINCOS, MVT::f128, LibCall); + setOperationAction(ISD::FTAN, MVT::f128, LibCall); + setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall); // No STRICT_FSINCOS setOperationAction(ISD::FSQRT, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); @@ -919,6 +927,7 @@ setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FTAN, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); @@ -2387,7 +2396,8 @@ ISD::FLOG, ISD::STRICT_FLOG, ISD::FLOG10, ISD::STRICT_FLOG10, ISD::FPOW, ISD::STRICT_FPOW, - ISD::FSIN, ISD::STRICT_FSIN}) + ISD::FSIN, ISD::STRICT_FSIN, + ISD::FTAN, ISD::STRICT_FTAN}) if (isOperationExpand(Op, MVT::f32)) setOperationAction(Op, MVT::f32, Promote); diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.tan.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_tanf16(half %a) { +; CHECK-LABEL: use_tanf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %x = call half @llvm.tan.f16(half %a) + ret half %x +} + +define float @use_tanf32(float %a) { +; CHECK-LABEL: use_tanf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanf@PLT # TAILCALL + %x = call float @llvm.tan.f32(float %a) + ret float %x +} + +define double @use_tanf64(double %a) { +; CHECK-LABEL: use_tanf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tan@PLT # TAILCALL + %x = call double @llvm.tan.f64(double %a) + ret double %x +} + +define fp128 @use_tanfp128(fp128 %a) { +; CHECK-LABEL: use_tanfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanl@PLT # TAILCALL + %x = call fp128 @llvm.tan.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) { +; CHECK-LABEL: use_tanppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq tanl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.tan.f16(half) +declare float @llvm.tan.f32(float) +declare double @llvm.tan.f64(double) +declare fp128 @llvm.tan.f128(fp128) +declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128) + diff --git a/llvm/test/CodeGen/X86/vec-libcalls.ll b/llvm/test/CodeGen/X86/vec-libcalls.ll --- a/llvm/test/CodeGen/X86/vec-libcalls.ll +++ b/llvm/test/CodeGen/X86/vec-libcalls.ll @@ -17,6 +17,14 @@ declare <6 x float> @llvm.sin.v6f32(<6 x float>) declare <3 x double> @llvm.sin.v3f64(<3 x double>) +declare <1 x float> @llvm.tan.v1f32(<1 x float>) +declare <2 x float> @llvm.tan.v2f32(<2 x float>) +declare <3 x float> @llvm.tan.v3f32(<3 x float>) +declare <4 x float> @llvm.tan.v4f32(<4 x float>) +declare <5 x float> @llvm.tan.v5f32(<5 x float>) +declare <6 x float> @llvm.tan.v6f32(<6 x float>) +declare <3 x double> @llvm.tan.v3f64(<3 x double>) + ; Verify that all of the potential libcall candidates are handled. ; Some of these have custom lowering, so those cases won't have ; libcalls. @@ -230,6 +238,199 @@ ret <3 x double> %r } +define <1 x float> @tan_v1f32(<1 x float> %x) nounwind { +; CHECK-LABEL: tan_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x) + ret <1 x float> %r +} + +define <2 x float> @tan_v2f32(<2 x float> %x) nounwind { +; CHECK-LABEL: tan_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x) + ret <2 x float> %r +} + +define <3 x float> @tan_v3f32(<3 x float> %x) nounwind { +; CHECK-LABEL: tan_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x) + ret <3 x float> %r +} + +define <4 x float> @tan_v4f32(<4 x float> %x) nounwind { +; CHECK-LABEL: tan_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[3,3,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x) + ret <4 x float> %r +} + +define <5 x float> @tan_v5f32(<5 x float> %x) nounwind { +; CHECK-LABEL: tan_v5f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[3,3,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: retq + %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x) + ret <5 x float> %r +} + +define <6 x float> @tan_v6f32(<6 x float> %x) nounwind { +; CHECK-LABEL: tan_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[3,3,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: retq + %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x) + ret <6 x float> %r +} + +define <3 x double> @tan_v3f64(<3 x double> %x) nounwind { +; CHECK-LABEL: tan_v3f64: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: retq + %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x) + ret <3 x double> %r +} define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind { ; CHECK-LABEL: fabs_v2f32: ; CHECK: # %bb.0: