Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -9952,6 +9952,42 @@ This function returns the same values as the libm ``exp2`` functions would, and handles error conditions in the same way. +'``llvm.ldexp.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ldexp`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.ldexp.f32(float %Val, i32 %Exp) + declare double @llvm.ldexp.f64(double %Val, i32 %Exp) + declare x86_fp80 @llvm.ldexp.f80(x86_fp80 %Val, i32 %Exp) + declare fp128 @llvm.ldexp.f128(fp128 %Val, i32 %Exp) + declare ppc_fp128 @llvm.ldexp.ppcf128(ppc_fp128 %Val, i32 %Exp) + +Overview: +""""""""" + +The '``llvm.ldexp.*``' intrinsics perform the ldexp function. + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. The second argument is an integer. + +Semantics: +"""""""""" + +This function multiplies the first argument by 2 raised to the second argument's +power. If the first argument is NaN or infinite, the same value is returned. +The behavior in case of a range error (underflow or overflow) is undefined. + '``llvm.log.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- include/llvm/Analysis/TargetLibraryInfo.h +++ include/llvm/Analysis/TargetLibraryInfo.h @@ -233,6 +233,7 @@ case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l: case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: + case LibFunc::ldexp: case LibFunc::ldexpf: case LibFunc::ldexpl: case LibFunc::memcmp: case LibFunc::strcmp: case LibFunc::strcpy: case LibFunc::stpcpy: case LibFunc::strlen: case LibFunc::strnlen: case LibFunc::memchr: Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -525,6 +525,9 @@ /// when a single input is NaN, NaN is returned. FMINNAN, FMAXNAN, + /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). + FLDEXP, + /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, Index: include/llvm/CodeGen/RuntimeLibcalls.h =================================================================== --- include/llvm/CodeGen/RuntimeLibcalls.h +++ include/llvm/CodeGen/RuntimeLibcalls.h @@ -148,6 +148,11 @@ EXP2_F80, EXP2_F128, EXP2_PPCF128, + LDEXP_F32, + LDEXP_F64, + LDEXP_F80, + LDEXP_F128, + LDEXP_PPCF128, SIN_F32, SIN_F64, SIN_F80, Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -381,6 +381,7 @@ def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_ldexp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>; def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_copysign : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -153,6 +153,9 @@ def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int SDTCisInt<0>, SDTCisFP<1> ]>; +def SDTFPExpOp : SDTypeProfile<1, 2, [ // ldexp + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2> +]>; def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>, SDTCisVTSmallerThanOp<2, 1> @@ -447,6 +450,7 @@ def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>; def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>; def frnd : SDNode<"ISD::FROUND" , SDTFPUnaryOp>; +def fldexp : SDNode<"ISD::FLDEXP" , SDTFPExpOp>; def fround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>; def fextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2609,6 +2609,8 @@ return true; } return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); + case Intrinsic::ldexp: + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); case Intrinsic::fma: case Intrinsic::fmuladd: // x*x+y is non-negative if y is non-negative. Index: lib/Analysis/VectorUtils.cpp =================================================================== --- lib/Analysis/VectorUtils.cpp +++ lib/Analysis/VectorUtils.cpp @@ -58,6 +58,7 @@ case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::powi: + case Intrinsic::ldexp: return true; default: return false; @@ -65,13 +66,14 @@ } /// \brief Identifies if the intrinsic has a scalar operand. It check for -/// ctlz,cttz and powi special intrinsics whose argument is scalar. +/// ctlz,cttz, powi and ldexp special intrinsics whose argument is scalar. bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx) { switch (ID) { case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::powi: + case Intrinsic::ldexp: return (ScalarOpdIdx == 1); default: return false; Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3388,6 +3388,23 @@ } break; } + + case ISD::FLDEXP: { + EVT VT = Node->getValueType(0); + if ((VT == MVT::f32 && TLI.getLibcallName(RTLIB::LDEXP_F32)) || + (VT == MVT::f64 && TLI.getLibcallName(RTLIB::LDEXP_F64)) || + (VT == MVT::f80 && TLI.getLibcallName(RTLIB::LDEXP_F80)) || + (VT == MVT::f128 && TLI.getLibcallName(RTLIB::LDEXP_F128)) || + (VT == MVT::ppcf128 && TLI.getLibcallName(RTLIB::LDEXP_PPCF128))) + break; /* use the LibCall instead, it is very likely faster */ + + Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, VT, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::FEXP2, dl, VT, Tmp1); + Tmp3 = DAG.getNode(ISD::FMUL, dl, VT, Node->getOperand(0), Tmp2); + Results.push_back(Tmp3); + break; + } + case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); @@ -4113,6 +4130,11 @@ RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128)); break; + case ISD::FLDEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, + RTLIB::LDEXP_F80, RTLIB::LDEXP_F128, + RTLIB::LDEXP_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, @@ -4427,6 +4449,7 @@ break; } case ISD::FCOPYSIGN: + case ISD::FLDEXP: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -604,13 +604,13 @@ SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); + SDValue ScalarizeVecRes_ExpOp(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); - SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); @@ -653,13 +653,13 @@ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExpOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); @@ -731,7 +731,7 @@ SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); - SDValue WidenVecRes_POWI(SDNode *N); + SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -308,6 +308,7 @@ case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FLDEXP: case ISD::FPOWI: case ISD::FPOW: case ISD::FLOG: Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -55,7 +55,8 @@ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; - case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::FLDEXP: + case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; @@ -203,10 +204,10 @@ NewVT, Op, N->getOperand(1)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_ExpOp(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FPOWI, SDLoc(N), - Op.getValueType(), Op, N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, + N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -600,7 +601,8 @@ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; - case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::FLDEXP: + case ISD::FPOWI: SplitVecRes_ExpOp(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; @@ -874,12 +876,11 @@ false, false, false, MinAlign(Alignment, IncrementSize)); } -void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_ExpOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); - Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); - Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); + Lo = DAG.getNode(N->getOpcode(), dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, Hi.getValueType(), Hi, N->getOperand(1)); } void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, @@ -2059,8 +2060,9 @@ Res = WidenVecRes_FCOPYSIGN(N); break; + case ISD::FLDEXP: case ISD::FPOWI: - Res = WidenVecRes_POWI(N); + Res = WidenVecRes_ExpOp(N); break; case ISD::SHL: @@ -2366,7 +2368,7 @@ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } -SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -843,6 +843,7 @@ bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); + bool visitLdExpCall(const CallInst &I); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4263,6 +4263,12 @@ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +/// ExpandLdExp - Expand a llvm.ldexp intrinsic. +static SDValue ExpandLdExp(SDLoc DL, SDValue Op1, SDValue Op2, + SelectionDAG &DAG) { + return DAG.getNode(ISD::FLDEXP, DL, Op1.getValueType(), Op1, Op2); +} + // getUnderlyingArgReg - Find underlying register used for a truncated or // bitcasted argument. static unsigned getUnderlyingArgReg(const SDValue &N) { @@ -4766,6 +4772,10 @@ setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return nullptr; + case Intrinsic::ldexp: + setValue(&I, ExpandLdExp(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); + return nullptr; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; @@ -5778,6 +5788,22 @@ return true; } +/// visitLdExpCall - If a call instruction fits a ldexp call (as expected), +/// translate it to an SDNode with opcode FLDEXP and return true. +bool SelectionDAGBuilder::visitLdExpCall(const CallInst &I) { + if (I.getNumArgOperands() != 2 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + !I.getArgOperand(1)->getType()->isIntegerTy() || + I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory()) + return false; + + SDValue Tmp0 = getValue(I.getArgOperand(0)); + SDValue Tmp1 = getValue(I.getArgOperand(1)); + EVT VT = Tmp0.getValueType(); + setValue(&I, DAG.getNode(ISD::FLDEXP, getCurSDLoc(), VT, Tmp0, Tmp1)); + return true; +} + void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (isa(I.getCalledValue())) { @@ -5915,6 +5941,12 @@ if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; + case LibFunc::ldexp: + case LibFunc::ldexpf: + case LibFunc::ldexpl: + if (visitLdExpCall(I)) + return; + break; case LibFunc::memcmp: if (visitMemCmpCall(I)) return; Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -208,6 +208,7 @@ case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; + case ISD::FLDEXP: return "fldexp"; case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCE: return "setcce"; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -166,6 +166,11 @@ Names[RTLIB::EXP2_F80] = "exp2l"; Names[RTLIB::EXP2_F128] = "exp2l"; Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::LDEXP_F32] = "ldexpf"; + Names[RTLIB::LDEXP_F64] = "ldexp"; + Names[RTLIB::LDEXP_F80] = "ldexpl"; + Names[RTLIB::LDEXP_F128] = "ldexpl"; + Names[RTLIB::LDEXP_PPCF128] = "ldexpl"; Names[RTLIB::SIN_F32] = "sinf"; Names[RTLIB::SIN_F64] = "sin"; Names[RTLIB::SIN_F80] = "sinl"; @@ -435,6 +440,13 @@ Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2"; Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2"; } + + if (TT.isOSWindows() && !TT.isOSCygMing()) { + Names[RTLIB::LDEXP_F32] = nullptr; + Names[RTLIB::LDEXP_F80] = nullptr; + Names[RTLIB::LDEXP_F128] = nullptr; + Names[RTLIB::LDEXP_PPCF128] = nullptr; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -864,6 +876,7 @@ setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FEXP , VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FLDEXP, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FMINNUM, VT, Expand); setOperationAction(ISD::FMAXNUM, VT, Expand); Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -301,6 +301,7 @@ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); setOperationAction(ISD::FPOW, MVT::f16, Promote); setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FLDEXP, MVT::f16, Promote); setOperationAction(ISD::FRINT, MVT::f16, Promote); setOperationAction(ISD::FSIN, MVT::f16, Promote); setOperationAction(ISD::FSINCOS, MVT::f16, Promote); @@ -345,6 +346,7 @@ setOperationAction(ISD::FNEG, MVT::v4f16, Expand); setOperationAction(ISD::FPOW, MVT::v4f16, Expand); setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); + setOperationAction(ISD::FLDEXP, MVT::v4f16, Expand); setOperationAction(ISD::FREM, MVT::v4f16, Expand); setOperationAction(ISD::FROUND, MVT::v4f16, Expand); setOperationAction(ISD::FRINT, MVT::v4f16, Expand); @@ -377,6 +379,7 @@ setOperationAction(ISD::FNEG, MVT::v8f16, Expand); setOperationAction(ISD::FPOW, MVT::v8f16, Expand); setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); + setOperationAction(ISD::FLDEXP, MVT::v8f16, Expand); setOperationAction(ISD::FREM, MVT::v8f16, Expand); setOperationAction(ISD::FROUND, MVT::v8f16, Expand); setOperationAction(ISD::FRINT, MVT::v8f16, Expand); @@ -659,6 +662,7 @@ setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand); setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand); setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FLDEXP, VT.getSimpleVT(), Expand); setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand); setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand); setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand); Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -271,7 +271,6 @@ RSQ, RSQ_LEGACY, RSQ_CLAMPED, - LDEXP, FP_CLASS, DOT4, CARRY, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -98,6 +98,9 @@ setOperationAction(ISD::FREM, MVT::f32, Custom); setOperationAction(ISD::FREM, MVT::f64, Custom); + setOperationAction(ISD::FLDEXP, MVT::f32, Legal); + setOperationAction(ISD::FLDEXP, MVT::f64, Legal); + // v_mad_f32 does not support denormals according to some sources. if (!Subtarget->hasFP32Denormals()) setOperationAction(ISD::FMAD, MVT::f32, Legal); @@ -356,6 +359,8 @@ setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FLDEXP, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); @@ -917,8 +922,8 @@ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::AMDGPU_ldexp: // Legacy name - return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_bfe_i32: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, @@ -2123,7 +2128,7 @@ SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); - SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, + SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi, DAG.getConstant(32, SL, MVT::i32)); // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); @@ -2850,7 +2855,6 @@ NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RSQ_LEGACY) NODE_NAME_CASE(RSQ_CLAMPED) - NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(FP_CLASS) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(CARRY) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -65,8 +65,6 @@ // out = 1.0 / sqrt(a) result clamped to +/- max_float. def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; -def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; - def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1499,7 +1499,7 @@ Glue); } case Intrinsic::amdgcn_ldexp: - return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, + return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::amdgcn_class: return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT, Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1592,7 +1592,7 @@ VOP_I32_I32_I32, int_amdgcn_mbcnt_hi >; defm V_LDEXP_F32 : VOP2_VI3_Inst , "v_ldexp_f32", - VOP_F32_F32_I32, AMDGPUldexp + VOP_F32_F32_I32, fldexp >; defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst , "v_cvt_pkaccum_u8_f32", @@ -1743,7 +1743,7 @@ } // End isCommutable = 1 defm V_LDEXP_F64 : VOP3Inst , "v_ldexp_f64", - VOP_F64_F64_I32, AMDGPUldexp + VOP_F64_F64_I32, fldexp >; } // End let SchedRW = [WriteDoubleAdd] Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -501,6 +501,7 @@ setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::v2f64, Expand); setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); @@ -519,6 +520,7 @@ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::v4f32, Expand); setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); @@ -536,6 +538,7 @@ setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::v2f32, Expand); setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1759,7 +1759,7 @@ ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, - ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, + ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, ISD::FLDEXP, // Misc: ISD::SELECT, ISD::ConstantPool, // Vector: Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -462,6 +462,7 @@ setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FLDEXP, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); @@ -696,6 +697,7 @@ setOperationAction(ISD::FCOS , MVT::v4f64, Expand); setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); setOperationAction(ISD::FPOW , MVT::v4f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::v4f64, Expand); setOperationAction(ISD::FLOG , MVT::v4f64, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); @@ -742,6 +744,7 @@ setOperationAction(ISD::FCOS , MVT::v4f32, Expand); setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); setOperationAction(ISD::FPOW , MVT::v4f32, Expand); + setOperationAction(ISD::FLDEXP , MVT::v4f32, Expand); setOperationAction(ISD::FLOG , MVT::v4f32, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -710,6 +710,7 @@ setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FLDEXP, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1298,13 +1298,19 @@ Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + bool TryLdExp; LibFunc::Func LdExp = LibFunc::ldexpl; - if (Op->getType()->isFloatTy()) - LdExp = LibFunc::ldexpf; - else if (Op->getType()->isDoubleTy()) - LdExp = LibFunc::ldexp; + if (Callee->isIntrinsic()) { + TryLdExp = true; + } else { + if (Op->getType()->isFloatTy()) + LdExp = LibFunc::ldexpf; + else if (Op->getType()->isDoubleTy()) + LdExp = LibFunc::ldexp; + TryLdExp = TLI->has(LdExp); + } - if (TLI->has(LdExp)) { + if (TryLdExp) { Value *LdExpArg = nullptr; if (SIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) @@ -1320,14 +1326,19 @@ One = ConstantExpr::getFPExtend(One, Op->getType()); Module *M = CI->getModule(); - Value *NewCallee = - M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), - Op->getType(), B.getInt32Ty(), nullptr); - CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); - if (const Function *F = dyn_cast(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; + if (Callee->isIntrinsic()) { + Function *F = + Intrinsic::getDeclaration(M, Intrinsic::ldexp, Op->getType()); + return B.CreateCall(F, {One, LdExpArg}); + } else { + Value *NewCallee = + M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), + Op->getType(), B.getInt32Ty(), nullptr); + CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); + if (const Function *F = dyn_cast(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; + } } } return Ret; Index: test/CodeGen/AMDGPU/llvm.ldexp.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.ldexp.ll @@ -0,0 +1,93 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: {{^}}test_ldexp_f32: +; SI: v_ldexp_f32 +; SI: s_endpgm +define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind { + %result = call float @llvm.ldexp.f32(float %a, i32 %b) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_ldexp_v2f32: +; SI: v_ldexp_f32 +; SI: v_ldexp_f32 +; SI: s_endpgm +define void @test_ldexp_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind { + %result = call <2 x float> @llvm.ldexp.v2f32(<2 x float> %a, i32 %b) nounwind readnone + store <2 x float> %result, <2 x float> addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_ldexp_v3f32: +; SI: v_ldexp_f32 +; SI: v_ldexp_f32 +; SI: v_ldexp_f32 +; SI: s_endpgm +define void @test_ldexp_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, i32 %b) nounwind { + %result = call <3 x float> @llvm.ldexp.v3f32(<3 x float> %a, i32 %b) nounwind readnone + store <3 x float> %result, <3 x float> addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_ldexp_v4f32: +; SI: v_ldexp_f32 +; SI: v_ldexp_f32 +; SI: v_ldexp_f32 +; SI: v_ldexp_f32 +; SI: s_endpgm +define void @test_ldexp_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind { + %result = call <4 x float> @llvm.ldexp.v4f32(<4 x float> %a, i32 %b) nounwind readnone + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_ldexp_f64: +; SI: v_ldexp_f64 +; SI: s_endpgm +define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind { + %result = call double @llvm.ldexp.f64(double %a, i32 %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: {{^}}test_ldexp_v2f64: +; SI: v_ldexp_f64 +; SI: v_ldexp_f64 +; SI: s_endpgm +define void @test_ldexp_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind { + %result = call <2 x double> @llvm.ldexp.v2f64(<2 x double> %a, i32 %b) nounwind readnone + store <2 x double> %result, <2 x double> addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}legacy_ldexp_f32: +; SI: v_ldexp_f32 +; SI: s_endpgm +define void @legacy_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind { + %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}legacy_ldexp_f64: +; SI: v_ldexp_f64 +; SI: s_endpgm +define void @legacy_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind { + %result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +declare float @llvm.ldexp.f32(float, i32) #1 +declare <2 x float> @llvm.ldexp.v2f32(<2 x float>, i32) #1 +declare <3 x float> @llvm.ldexp.v3f32(<3 x float>, i32) #1 +declare <4 x float> @llvm.ldexp.v4f32(<4 x float>, i32) #1 +declare double @llvm.ldexp.f64(double, i32) #1 +declare <2 x double> @llvm.ldexp.v2f64(<2 x double>, i32) #1 + +declare float @llvm.AMDGPU.ldexp.f32(float, i32) #1 +declare double @llvm.AMDGPU.ldexp.f64(double, i32) #1 + +attributes #1 = { nounwind readnone } Index: test/CodeGen/X86/ldexp.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/ldexp.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN + +; CHECK-LABEL: ldexp_f32: +; CHECK-WIN-LABEL: ldexp_f32: +; CHECK: jmp ldexpf +; CHECK-WIN-NOT: ldexpf +define float @ldexp_f32(i8 zeroext %x) { + %1 = zext i8 %x to i32 + %2 = call float @llvm.ldexp.f32(float 1.000000e+00, i32 %1) + ret float %2 +} + +; CHECK-LABEL: ldexp_f64: +; CHECK-WIN-LABEL: ldexp_f64: +; CHECK: jmp ldexp +; CHECK-WIN: calll _ldexp +define double @ldexp_f64(i8 zeroext %x) { + %1 = zext i8 %x to i32 + %2 = call double @llvm.ldexp.f64(double 1.000000e+00, i32 %1) + ret double %2 +} + +; CHECK-LABEL: ldexp_v2f32: +; CHECK_WIN-LABEL: ldexp_v2f32: +; CHECK: ldexpf +; CHECK: ldexpf +; CHECK-WIN-NOT: ldexpf +define <2 x float> @ldexp_v2f32(<2 x float> %val, i32 %exp) { + %1 = call <2 x float> @llvm.ldexp.v2f32(<2 x float> %val, i32 %exp) + ret <2 x float> %1 +} + +; Function Attrs: nounwind readnone +declare double @llvm.ldexp.f64(double, i32) #0 + +; Function Attrs: nounwind readnone +declare float @llvm.ldexp.f32(float, i32) #0 + +; Function Attrs: nounwind readnone +declare <2 x float> @llvm.ldexp.v2f32(<2 x float>, i32) #0 + +attributes #0 = { nounwind readnone } Index: test/Transforms/InstCombine/exp2-1.ll =================================================================== --- test/Transforms/InstCombine/exp2-1.ll +++ test/Transforms/InstCombine/exp2-1.ll @@ -1,8 +1,8 @@ ; Test that the exp2 library call simplifier works correctly. ; -; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=CHECK -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=LDEXPF -; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=NOLDEXPF -; RUN: opt < %s -instcombine -S -mtriple=amdgcn-unknown-unknown | FileCheck %s -check-prefix=INTRINSIC -check-prefix=NOLDEXP -check-prefix=NOLDEXPF +; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=CHECK -check-prefix=LDEXP -check-prefix=LDEXPF +; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK -check-prefix=LDEXP -check-prefix=NOLDEXPF +; RUN: opt < %s -instcombine -S -mtriple=amdgcn-unknown-unknown | FileCheck %s -check-prefix=CHECK -check-prefix=NOLDEXP -check-prefix=NOLDEXPF target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -15,7 +15,8 @@ ; CHECK-LABEL: @test_simplify1( %conv = sitofp i32 %x to double %ret = call double @exp2(double %conv) -; CHECK: call double @ldexp +; CHECK-LDEXP: call double @ldexp +; CHECK-NOLDEXP: call double @exp2 ret double %ret } @@ -23,7 +24,8 @@ ; CHECK-LABEL: @test_simplify2( %conv = sitofp i16 %x to double %ret = call double @exp2(double %conv) -; CHECK: call double @ldexp +; CHECK-LDEXP: call double @ldexp +; CHECK-NOLDEXP: call double @exp2 ret double %ret } @@ -31,7 +33,8 @@ ; CHECK-LABEL: @test_simplify3( %conv = sitofp i8 %x to double %ret = call double @exp2(double %conv) -; CHECK: call double @ldexp +; CHECK-LDEXP: call double @ldexp +; CHECK-NOLDEXP: call double @exp2 ret double %ret } @@ -39,7 +42,8 @@ ; CHECK-LABEL: @test_simplify4( %conv = sitofp i32 %x to float %ret = call float @exp2f(float %conv) -; CHECK: call float @ldexpf +; CHECK-LDEXPF: call double @ldexpf +; CHECK-NOLDEXPF: call double @exp2f ret float %ret } @@ -49,7 +53,8 @@ ; CHECK-LABEL: @test_no_simplify1( %conv = uitofp i32 %x to double %ret = call double @exp2(double %conv) -; CHECK: call double @exp2 +; CHECK-LDEXP: call double @ldexp +; CHECK-NOLDEXP: call double @exp2 ret double %ret } @@ -57,7 +62,8 @@ ; CHECK-LABEL: @test_simplify6( %conv = uitofp i16 %x to double %ret = call double @exp2(double %conv) -; CHECK: call double @ldexp +; CHECK-LDEXP: call double @ldexp +; CHECK-NOLDEXP: call double @exp2 ret double %ret } @@ -65,7 +71,8 @@ ; CHECK-LABEL: @test_simplify7( %conv = uitofp i8 %x to double %ret = call double @exp2(double %conv) -; CHECK: call double @ldexp +; CHECK-LDEXP: call double @ldexp +; CHECK-NOLDEXP: call double @exp2 ret double %ret } @@ -73,7 +80,8 @@ ; CHECK-LABEL: @test_simplify8( %conv = uitofp i8 %x to float %ret = call float @exp2f(float %conv) -; CHECK: call float @ldexpf +; CHECK-LDEXPF: call double @ldexpf +; CHECK-NOLDEXPF: call double @exp2f ret float %ret } @@ -81,19 +89,17 @@ declare float @llvm.exp2.f32(float) define double @test_simplify9(i8 zeroext %x) { -; INTRINSIC-LABEL: @test_simplify9( +; CHECK-LABEL: @test_simplify9( %conv = uitofp i8 %x to double %ret = call double @llvm.exp2.f64(double %conv) -; LDEXP: call double @ldexp -; NOLDEXP-NOT: call double @ldexp +; CHECK: call double @llvm.ldexp.f64 ret double %ret } define float @test_simplify10(i8 zeroext %x) { -; INTRINSIC-LABEL: @test_simplify10( +; CHECK-LABEL: @test_simplify10( %conv = uitofp i8 %x to float %ret = call float @llvm.exp2.f32(float %conv) -; LDEXPF: call float @ldexpf -; NOLDEXPF-NOT: call float @ldexpf +; CHECK: call float @llvm.ldexp.f32 ret float %ret } Index: test/Transforms/InstSimplify/floating-point-compare.ll =================================================================== --- test/Transforms/InstSimplify/floating-point-compare.ll +++ test/Transforms/InstSimplify/floating-point-compare.ll @@ -12,16 +12,18 @@ declare float @llvm.maxnum.f32(float, float) declare double @llvm.exp2.f64(double) declare float @llvm.fma.f32(float,float,float) +declare float @llvm.ldexp.f32(float,i32) declare void @expect_equal(i1,i1) ; CHECK-LABEL: @orderedLessZeroTree( -define i1 @orderedLessZeroTree(float,float,float,float) { +define i1 @orderedLessZeroTree(float,float,float,float,i32) { %square = fmul float %0, %0 + %ldexp = call float @llvm.ldexp.f32(float %square, i32 %4) %abs = call float @llvm.fabs.f32(float %1) %sqrt = call float @llvm.sqrt.f32(float %2) %fma = call float @llvm.fma.f32(float %3, float %3, float %sqrt) - %div = fdiv float %square, %abs + %div = fdiv float %ldexp, %abs %rem = frem float %sqrt, %fma %add = fadd float %div, %rem %uge = fcmp uge float %add, 0.000000e+00 Index: test/Transforms/SLPVectorizer/X86/intrinsic.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/intrinsic.ll +++ test/Transforms/SLPVectorizer/X86/intrinsic.ll @@ -384,3 +384,92 @@ ; CHECK-LABEL: @vec_powi_f32_neg( ; CHECK-NOT: call <4 x float> @llvm.powi.v4f32 } + + +declare float @llvm.ldexp.f32(float, i32) +define void @vec_ldexp_f32(float* %a, float* %b, float* %c, i32 %P) { +entry: + %i0 = load float, float* %a, align 4 + %i1 = load float, float* %b, align 4 + %add1 = fadd float %i0, %i1 + %call1 = tail call float @llvm.ldexp.f32(float %add1, i32 %P) nounwind readnone + + %arrayidx2 = getelementptr inbounds float, float* %a, i32 1 + %i2 = load float, float* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds float, float* %b, i32 1 + %i3 = load float, float* %arrayidx3, align 4 + %add2 = fadd float %i2, %i3 + %call2 = tail call float @llvm.ldexp.f32(float %add2, i32 %P) nounwind readnone + + %arrayidx4 = getelementptr inbounds float, float* %a, i32 2 + %i4 = load float, float* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds float, float* %b, i32 2 + %i5 = load float, float* %arrayidx5, align 4 + %add3 = fadd float %i4, %i5 + %call3 = tail call float @llvm.ldexp.f32(float %add3, i32 %P) nounwind readnone + + %arrayidx6 = getelementptr inbounds float, float* %a, i32 3 + %i6 = load float, float* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds float, float* %b, i32 3 + %i7 = load float, float* %arrayidx7, align 4 + %add4 = fadd float %i6, %i7 + %call4 = tail call float @llvm.ldexp.f32(float %add4, i32 %P) nounwind readnone + + store float %call1, float* %c, align 4 + %arrayidx8 = getelementptr inbounds float, float* %c, i32 1 + store float %call2, float* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds float, float* %c, i32 2 + store float %call3, float* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds float, float* %c, i32 3 + store float %call4, float* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_ldexp_f32( +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: call <4 x float> @llvm.ldexp.v4f32 +; CHECK: store <4 x float> +; CHECK: ret +} + + +define void @vec_ldexp_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) { +entry: + %i0 = load float, float* %a, align 4 + %i1 = load float, float* %b, align 4 + %add1 = fadd float %i0, %i1 + %call1 = tail call float @llvm.ldexp.f32(float %add1, i32 %P) nounwind readnone + + %arrayidx2 = getelementptr inbounds float, float* %a, i32 1 + %i2 = load float, float* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds float, float* %b, i32 1 + %i3 = load float, float* %arrayidx3, align 4 + %add2 = fadd float %i2, %i3 + %call2 = tail call float @llvm.ldexp.f32(float %add2, i32 %Q) nounwind readnone + + %arrayidx4 = getelementptr inbounds float, float* %a, i32 2 + %i4 = load float, float* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds float, float* %b, i32 2 + %i5 = load float, float* %arrayidx5, align 4 + %add3 = fadd float %i4, %i5 + %call3 = tail call float @llvm.ldexp.f32(float %add3, i32 %P) nounwind readnone + + %arrayidx6 = getelementptr inbounds float, float* %a, i32 3 + %i6 = load float, float* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds float, float* %b, i32 3 + %i7 = load float, float* %arrayidx7, align 4 + %add4 = fadd float %i6, %i7 + %call4 = tail call float @llvm.ldexp.f32(float %add4, i32 %Q) nounwind readnone + + store float %call1, float* %c, align 4 + %arrayidx8 = getelementptr inbounds float, float* %c, i32 1 + store float %call2, float* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds float, float* %c, i32 2 + store float %call3, float* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds float, float* %c, i32 3 + store float %call4, float* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_ldexp_f32_neg( +; CHECK-NOT: call <4 x float> @llvm.ldexp.v4f32 +}