Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -17049,8 +17049,13 @@ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: case AMDGPU::BI__builtin_amdgcn_ldexpf: - case AMDGPU::BI__builtin_amdgcn_ldexph: - return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); + case AMDGPU::BI__builtin_amdgcn_ldexph: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()}); + return Builder.CreateCall(F, {Src0, Src1}); + } case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: case AMDGPU::BI__builtin_amdgcn_frexp_manth: Index: clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -52,7 +52,7 @@ } // CHECK-LABEL: @test_ldexp_f16 -// CHECK: call half @llvm.amdgcn.ldexp.f16 +// CHECK: call half @llvm.ldexp.f16.i32 void test_ldexp_f16(global half* out, half a, int b) { *out = __builtin_amdgcn_ldexph(a, b); Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -180,14 +180,14 @@ } // CHECK-LABEL: @test_ldexp_f32 -// CHECK: call float @llvm.amdgcn.ldexp.f32 +// CHECK: call float @llvm.ldexp.f32.i32 void test_ldexp_f32(global float* out, float a, int b) { *out = __builtin_amdgcn_ldexpf(a, b); } // CHECK-LABEL: @test_ldexp_f64 -// CHECK: call double @llvm.amdgcn.ldexp.f64 +// CHECK: call double @llvm.ldexp.f64.i32 void test_ldexp_f64(global double* out, double a, int b) { *out = __builtin_amdgcn_ldexp(a, b); Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -14713,6 +14713,47 @@ When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +'``llvm.ldexp.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ldexp`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.ldexp.f32.i32(float %Val, i32 %Exp) + declare double @llvm.ldexp.f64.i32(double %Val, i32 %Exp) + declare x86_fp80 @llvm.ldexp.f80.i32(x86_fp80 %Val, i32 %Exp) + declare fp128 @llvm.ldexp.f128.i32(fp128 %Val, i32 %Exp) + declare ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128 %Val, i32 %Exp) + declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %Val, <2 x i32> %Exp) + +Overview: +""""""""" + +The '``llvm.ldexp.*``' intrinsics perform the ldexp function. + +Arguments: +"""""""""" + +The first argument and the return value are :ref:`floating-point +` or :ref:`vector ` of floating-point values of +the same type. The second argument is an integer with the same number +of elements. + +Semantics: +"""""""""" + +This function multiplies the first argument by 2 raised to the second +argument's power. If the first argument is NaN or infinite, the same +value is returned. If the result underflows a zero with the same sign +is returned. If the result overflows, the result is an infinity with +the same sign. + '``llvm.log.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -24258,6 +24299,47 @@ unspecified sequence of rounding operations. +'``llvm.experimental.constrained.ldexp``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.ldexp( , , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.ldexp``' performs the ldexp function. + + +Arguments: +"""""""""" + +The first argument and the return value are :ref:`floating-point +` or :ref:`vector ` of floating-point values of +the same type. The second argument is an integer with the same number +of elements. + + +The third and fourth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function multiplies the first argument by 2 raised to the second +argument's power. If the first argument is NaN or infinite, the same +value is returned. If the result underflows a zero with the same sign +is returned. If the result overflows, the result is an infinity with +the same sign. + + '``llvm.experimental.constrained.sin``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -56,6 +56,8 @@ * The ``nofpclass`` attribute was introduced. This allows more optimizations around special floating point value comparisons. +* Introduced new ``llvm.ldexp`` and ``llvm.experimental.constrained.ldexp`` intrinsics. + * The constant expression variants of the following instructions have been removed: Index: llvm/include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -378,6 +378,7 @@ case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l: case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: + case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove: case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp: case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen: Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -357,6 +357,7 @@ LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); /// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT. LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1825,6 +1825,13 @@ return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags); } + /// Build and insert \p Dst = G_FLDEXP \p Src0, \p Src1 + MachineInstrBuilder + buildFLdexp(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + std::optional Flags = std::nullopt) { + return buildInstr(TargetOpcode::G_FLDEXP, {Dst}, {Src0, Src1}, Flags); + } + /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1 MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -411,6 +411,7 @@ STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, + STRICT_FLDEXP, STRICT_FSIN, STRICT_FCOS, STRICT_FEXP, @@ -926,8 +927,10 @@ FCBRT, FSIN, FCOS, - FPOWI, FPOW, + FPOWI, + /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). + FLDEXP, FLOG, FLOG2, FLOG10, Index: llvm/include/llvm/CodeGen/RuntimeLibcalls.h =================================================================== --- llvm/include/llvm/CodeGen/RuntimeLibcalls.h +++ llvm/include/llvm/CodeGen/RuntimeLibcalls.h @@ -70,6 +70,10 @@ /// UNKNOWN_LIBCALL if there is none. Libcall getPOWI(EVT RetVT); + /// getLDEXP - Return the LDEXP_* value for the given types, or + /// UNKNOWN_LIBCALL if there is none. + Libcall getLDEXP(EVT RetVT); + /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); Index: llvm/include/llvm/IR/ConstrainedOps.def =================================================================== --- llvm/include/llvm/IR/ConstrainedOps.def +++ llvm/include/llvm/IR/ConstrainedOps.def @@ -89,6 +89,7 @@ DAG_FUNCTION(nearbyint, 1, 1, experimental_constrained_nearbyint, FNEARBYINT) DAG_FUNCTION(pow, 2, 1, experimental_constrained_pow, FPOW) DAG_FUNCTION(powi, 2, 1, experimental_constrained_powi, FPOWI) +DAG_FUNCTION(ldexp, 2, 1, experimental_constrained_ldexp, FLDEXP) DAG_FUNCTION(rint, 1, 1, experimental_constrained_rint, FRINT) DAG_FUNCTION(round, 1, 0, experimental_constrained_round, FROUND) DAG_FUNCTION(roundeven, 1, 0, experimental_constrained_roundeven, FROUNDEVEN) Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1037,6 +1037,10 @@ def int_llround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; def int_lrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; def int_llrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; + + // TODO: int operand should be constrained to same number of elements as the result. + def int_ldexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, + llvm_anyint_ty]>; } def int_minnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], @@ -1165,6 +1169,11 @@ llvm_i32_ty, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_ldexp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_anyint_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, Index: llvm/include/llvm/IR/RuntimeLibcalls.def =================================================================== --- llvm/include/llvm/IR/RuntimeLibcalls.def +++ llvm/include/llvm/IR/RuntimeLibcalls.def @@ -279,6 +279,11 @@ HANDLE_LIBCALL(LLRINT_F80, "llrintl") HANDLE_LIBCALL(LLRINT_F128, "llrintl") HANDLE_LIBCALL(LLRINT_PPCF128, "llrintl") +HANDLE_LIBCALL(LDEXP_F32, "ldexpf") +HANDLE_LIBCALL(LDEXP_F64, "ldexp") +HANDLE_LIBCALL(LDEXP_F80, "ldexpl") +HANDLE_LIBCALL(LDEXP_F128, "ldexpl") +HANDLE_LIBCALL(LDEXP_PPCF128, "ldexpl") // Conversion HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq") Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -613,6 +613,9 @@ /// Floating point base-10 logarithm of a value. HANDLE_TARGET_OPCODE(G_FLOG10) +/// Floating point x * 2^n +HANDLE_TARGET_OPCODE(G_FLDEXP) + /// Generic FP negation. HANDLE_TARGET_OPCODE(G_FNEG) @@ -762,6 +765,7 @@ HANDLE_TARGET_OPCODE(G_STRICT_FREM) HANDLE_TARGET_OPCODE(G_STRICT_FMA) HANDLE_TARGET_OPCODE(G_STRICT_FSQRT) +HANDLE_TARGET_OPCODE(G_STRICT_FLDEXP) /// read_register intrinsic HANDLE_TARGET_OPCODE(G_READ_REGISTER) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -923,6 +923,13 @@ let hasSideEffects = false; } +// Floating point x * 2^n +def G_FLDEXP : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type1:$src1); + let hasSideEffects = false; +} + // Floating point ceiling of a value. def G_FCEIL : GenericInstruction { let OutOperandList = (outs type0:$dst); @@ -1384,6 +1391,7 @@ def G_STRICT_FREM : ConstrainedInstruction; def G_STRICT_FMA : ConstrainedInstruction; def G_STRICT_FSQRT : ConstrainedInstruction; +def G_STRICT_FLDEXP : ConstrainedInstruction; //------------------------------------------------------------------------------ // Memory intrinsics Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -103,6 +103,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -158,6 +159,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -173,6 +173,9 @@ def SDTFPToIntSatOp : SDTypeProfile<1, 2, [ // fp_to_[su]int_sat SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, SDTCisVT<2, OtherVT> ]>; +def SDTFPExpOp : SDTypeProfile<1, 2, [ // ldexp + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2> +]>; def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>, SDTCisVTSmallerThanOp<2, 1> @@ -499,6 +502,7 @@ def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>; def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>; def flog2 : SDNode<"ISD::FLOG2" , SDTFPUnaryOp>; +def fldexp : SDNode<"ISD::FLDEXP" , SDTFPExpOp>; def frint : SDNode<"ISD::FRINT" , SDTFPUnaryOp>; def ftrunc : SDNode<"ISD::FTRUNC" , SDTFPUnaryOp>; def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>; @@ -549,6 +553,8 @@ SDTFPUnaryOp, [SDNPHasChain]>; def strict_fpow : SDNode<"ISD::STRICT_FPOW", SDTFPBinOp, [SDNPHasChain]>; +def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP", + SDTFPExpOp, [SDNPHasChain]>; def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_frint : SDNode<"ISD::STRICT_FRINT", @@ -1449,6 +1455,9 @@ def any_fpow : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fpow node:$lhs, node:$rhs), (fpow node:$lhs, node:$rhs)]>; +def any_fldexp : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fldexp node:$lhs, node:$rhs), + (fldexp node:$lhs, node:$rhs)]>; def any_flog2 : PatFrags<(ops node:$src), [(strict_flog2 node:$src), (flog2 node:$src)]>; Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1759,6 +1759,8 @@ return TargetOpcode::G_FLOG2; case Intrinsic::log10: return TargetOpcode::G_FLOG10; + case Intrinsic::ldexp: + return TargetOpcode::G_FLDEXP; case Intrinsic::nearbyint: return TargetOpcode::G_FNEARBYINT; case Intrinsic::pow: @@ -1851,6 +1853,8 @@ return TargetOpcode::G_STRICT_FMA; case Intrinsic::experimental_constrained_sqrt: return TargetOpcode::G_STRICT_FSQRT; + case Intrinsic::experimental_constrained_ldexp: + return TargetOpcode::G_STRICT_FLDEXP; default: return 0; } Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -542,6 +542,8 @@ RTLIBCASE(LOG_F); case TargetOpcode::G_FLOG2: RTLIBCASE(LOG2_F); + case TargetOpcode::G_FLDEXP: + RTLIBCASE(LDEXP_F); case TargetOpcode::G_FCEIL: RTLIBCASE(CEIL_F); case TargetOpcode::G_FFLOOR: @@ -826,6 +828,7 @@ case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FLDEXP: case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: case TargetOpcode::G_FCEIL: @@ -1413,6 +1416,9 @@ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_FLDEXP: + case TargetOpcode::G_STRICT_FLDEXP: + return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy); } } @@ -2553,14 +2559,30 @@ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_FPOWI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - Observer.changedInstr(MI); - return Legalized; + case TargetOpcode::G_FPOWI: + case TargetOpcode::G_FLDEXP: + case TargetOpcode::G_STRICT_FLDEXP: { + if (TypeIdx == 0) { + if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + } + + if (TypeIdx == 1) { + // For some reason SelectionDAG tries to promote to a libcall without + // actually changing the integer type for promotion. + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + return Legalized; + } + + return UnableToLegalize; } case TargetOpcode::G_INTTOPTR: if (TypeIdx != 1) @@ -4136,6 +4158,7 @@ case G_FLOG: case G_FLOG2: case G_FLOG10: + case G_FLDEXP: case G_FNEARBYINT: case G_FCEIL: case G_FFLOOR: @@ -4211,6 +4234,7 @@ case G_STRICT_FSUB: case G_STRICT_FMUL: case G_STRICT_FMA: + case G_STRICT_FLDEXP: return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: @@ -5592,6 +5616,31 @@ return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + MachineIRBuilder &B = MIRBuilder; + Register ExpReg = MI.getOperand(2).getReg(); + LLT ExpTy = MRI.getType(ExpReg); + + unsigned ClampSize = NarrowTy.getScalarSizeInBits(); + + // Clamp the exponent to the range of the target type. + auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize)); + auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp); + auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize)); + auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp); + + auto Trunc = B.buildTrunc(NarrowTy, Clamp); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(Trunc.getReg(0)); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -172,6 +172,8 @@ SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandFNEG(SDNode *Node) const; + SDValue expandLdexp(SDNode *Node) const; + SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, SmallVectorImpl &Results); @@ -2309,6 +2311,118 @@ DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo())); } +SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + SDValue X = Node->getOperand(0); + SDValue N = Node->getOperand(1); + EVT ExpVT = N.getValueType(); + EVT AsIntVT = VT.changeTypeToInteger(); + if (AsIntVT == EVT()) // TODO: How to handle f80? + return SDValue(); + + if (Node->getOpcode() == ISD::STRICT_FLDEXP) // TODO + return SDValue(); + + SDNodeFlags NSW; + NSW.setNoSignedWrap(true); + SDNodeFlags NUW_NSW; + NUW_NSW.setNoUnsignedWrap(true); + NUW_NSW.setNoSignedWrap(true); + + EVT SetCCVT = + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ExpVT); + const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT); + + const APFloat::ExponentType MaxExpVal = APFloat::semanticsMaxExponent(FltSem); + const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem); + const int Precision = APFloat::semanticsPrecision(FltSem); + + const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT); + const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT); + + const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT); + + const APFloat One(FltSem, "1.0"); + APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven); + + // Offset by precision to avoid denormal range. + APFloat ScaleDownK = + scalbn(One, MinExpVal + Precision, APFloat::rmNearestTiesToEven); + + // TODO: Should really introduce control flow and use a block for the > + // MaxExp, < MinExp cases + + // First, handle exponents Exp > MaxExp and scale down. + SDValue NGtMaxExp = DAG.getSetCC(dl, SetCCVT, N, MaxExp, ISD::SETGT); + + SDValue DecN0 = DAG.getNode(ISD::SUB, dl, ExpVT, N, MaxExp, NSW); + SDValue ClampMaxVal = DAG.getConstant(3 * MaxExpVal, dl, ExpVT); + SDValue ClampN_Big = DAG.getNode(ISD::SMIN, dl, ExpVT, N, ClampMaxVal); + SDValue DecN1 = + DAG.getNode(ISD::SUB, dl, ExpVT, ClampN_Big, DoubleMaxExp, NSW); + + SDValue ScaleUpTwice = + DAG.getSetCC(dl, SetCCVT, N, DoubleMaxExp, ISD::SETUGT); + + const SDValue ScaleUpVal = DAG.getConstantFP(ScaleUpK, dl, VT); + SDValue ScaleUp0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleUpVal); + SDValue ScaleUp1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleUp0, ScaleUpVal); + + SDValue SelectN_Big = + DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleUpTwice, DecN1, DecN0); + SDValue SelectX_Big = + DAG.getNode(ISD::SELECT, dl, VT, ScaleUpTwice, ScaleUp1, ScaleUp0); + + // Now handle exponents Exp < MinExp + SDValue NLtMinExp = DAG.getSetCC(dl, SetCCVT, N, MinExp, ISD::SETLT); + + SDValue Increment0 = DAG.getConstant(-(MinExpVal + Precision), dl, ExpVT); + SDValue Increment1 = DAG.getConstant(-2 * (MinExpVal + Precision), dl, ExpVT); + + SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW); + + SDValue ClampMinVal = + DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT); + SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal); + SDValue IncN1 = + DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW); + + const SDValue ScaleDownVal = DAG.getConstantFP(ScaleDownK, dl, VT); + SDValue ScaleDown0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleDownVal); + SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal); + + SDValue ScaleDownTwice = DAG.getSetCC( + dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT), + ISD::SETULT); + + SDValue SelectN_Small = + DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0); + SDValue SelectX_Small = + DAG.getNode(ISD::SELECT, dl, VT, ScaleDownTwice, ScaleDown1, ScaleDown0); + + // Now combine the two out of range exponent handling cases with the base + // case. + SDValue NewX = DAG.getNode( + ISD::SELECT, dl, VT, NGtMaxExp, SelectX_Big, + DAG.getNode(ISD::SELECT, dl, VT, NLtMinExp, SelectX_Small, X)); + + SDValue NewN = DAG.getNode( + ISD::SELECT, dl, ExpVT, NGtMaxExp, SelectN_Big, + DAG.getNode(ISD::SELECT, dl, ExpVT, NLtMinExp, SelectN_Small, N)); + + SDValue BiasedN = DAG.getNode(ISD::ADD, dl, ExpVT, NewN, MaxExp, NSW); + + SDValue ExponentShiftAmt = + DAG.getShiftAmountConstant(Precision - 1, ExpVT, dl); + SDValue CastExpToValTy = DAG.getZExtOrTrunc(BiasedN, dl, AsIntVT); + + SDValue AsInt = DAG.getNode(ISD::SHL, dl, AsIntVT, CastExpToValTy, + ExponentShiftAmt, NUW_NSW); + SDValue AsFP = DAG.getNode(ISD::BITCAST, dl, VT, AsInt); + return DAG.getNode(ISD::FMUL, dl, VT, NewX, AsFP); +} + /// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -3246,6 +3360,23 @@ } break; } + case ISD::FLDEXP: + case ISD::STRICT_FLDEXP: { + EVT VT = Node->getValueType(0); + RTLIB::Libcall LC = RTLIB::getLDEXP(VT); + // Use the LibCall instead, it is very likely faster + // FIXME: Use separate LibCall action. + if (TLI.getLibcallName(LC)) + break; + + if (SDValue Expanded = expandLdexp(Node)) { + Results.push_back(Expanded); + if (Node->getOpcode() == ISD::STRICT_FLDEXP) + Results.push_back(Expanded.getValue(1)); + } + + break; + } case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); @@ -4138,6 +4269,11 @@ RTLIB::ROUNDEVEN_F128, RTLIB::ROUNDEVEN_PPCF128, Results); break; + case ISD::FLDEXP: + case ISD::STRICT_FLDEXP: + ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80, + RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results); + break; case ISD::FPOWI: case ISD::STRICT_FPOWI: { RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0)); @@ -4844,6 +4980,7 @@ Results.push_back(Tmp4.getValue(1)); break; case ISD::FCOPYSIGN: + case ISD::FLDEXP: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -110,7 +110,9 @@ case ISD::STRICT_FPOW: case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; case ISD::STRICT_FPOWI: - case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FPOWI: + case ISD::FLDEXP: + case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break; case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::STRICT_FRINT: @@ -582,13 +584,17 @@ RTLIB::POW_PPCF128)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) { bool IsStrict = N->isStrictFPOpcode(); unsigned Offset = IsStrict ? 1 : 0; assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 || N->getOperand(1 + Offset).getValueType() == MVT::i32) && "Unsupported power type!"); - RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0)); + bool IsPowI = + N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI; + + RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0)) + : RTLIB::getLDEXP(N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); if (!TLI.getLibcallName(LC)) { // Some targets don't have a powi libcall; use pow instead. @@ -1253,6 +1259,8 @@ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::STRICT_FPOWI: case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FLDEXP: + case ISD::STRICT_FLDEXP: ExpandFloatRes_FLDEXP(N, Lo, Hi); break; case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break; case ISD::STRICT_FRINT: case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; @@ -1548,6 +1556,11 @@ ExpandFloatRes_Binary(N, RTLIB::getPOWI(N->getValueType(0)), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FLDEXP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Binary(N, RTLIB::getLDEXP(N->getValueType(0)), Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && @@ -2289,7 +2302,8 @@ case ISD::FMA: // FMA is same as FMAD case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; - case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break; + case ISD::FPOWI: + case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break; case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; @@ -2458,7 +2472,7 @@ } // Promote the Float (first) operand and retain the Integer (second) operand -SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteFloatRes_ExpOp(SDNode *N) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op0 = GetPromotedFloat(N->getOperand(0)); @@ -2655,7 +2669,8 @@ case ISD::FMA: // FMA is same as FMAD case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break; - case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break; + case ISD::FPOWI: + case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break; case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break; case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break; @@ -2767,7 +2782,7 @@ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } -SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) { +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1714,10 +1714,10 @@ case ISD::SDIVFIXSAT: case ISD::UDIVFIX: case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break; - case ISD::FPOWI: - case ISD::STRICT_FPOWI: Res = PromoteIntOp_FPOWI(N); break; - + case ISD::STRICT_FPOWI: + case ISD::FLDEXP: + case ISD::STRICT_FLDEXP: Res = PromoteIntOp_ExpOp(N); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -2201,26 +2201,29 @@ 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { bool IsStrict = N->isStrictFPOpcode(); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - // The integer operand is the last operand in FPOWI (so the result and - // floating point operand is already type legalized). + bool IsPowI = + N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI; + + // The integer operand is the last operand in FPOWI (or FLDEXP) (so the result + // and floating point operand is already type legalized). + RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0)) + : RTLIB::getLDEXP(N->getValueType(0)); + + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + SDValue Op = SExtPromotedInteger(N->getOperand(1)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); + } // We can't just promote the exponent type in FPOWI, since we want to lower // the node to a libcall and we if we promote to a type larger than // sizeof(int) the libcall might not be according to the targets ABI. Instead // we rewrite to a libcall here directly, letting makeLibCall handle promotion // if the target accepts it according to shouldSignExtendTypeInLibCall. - RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0)); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); - if (!TLI.getLibcallName(LC)) { - // Some targets don't have a powi libcall; use pow instead. - // FIXME: Implement this if some target needs it. - DAG.getContext()->emitError("Don't know how to promote fpowi to fpow"); - return DAG.getUNDEF(N->getValueType(0)); - } + unsigned OpOffset = IsStrict ? 1 : 0; // The exponent should fit in a sizeof(int) type for the libcall to be valid. assert(DAG.getLibInfo().getIntSize() == Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -400,7 +400,7 @@ SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FIX(SDNode *N); - SDValue PromoteIntOp_FPOWI(SDNode *N); + SDValue PromoteIntOp_ExpOp(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); @@ -562,7 +562,7 @@ SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); SDValue SoftenFloatRes_FPOW(SDNode *N); - SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_ExpOp(SDNode *N); SDValue SoftenFloatRes_FREEZE(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); @@ -640,6 +640,7 @@ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLDEXP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -689,7 +690,7 @@ SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); SDValue PromoteFloatRes_FMAD(SDNode *N); - SDValue PromoteFloatRes_FPOWI(SDNode *N); + SDValue PromoteFloatRes_ExpOp(SDNode *N); SDValue PromoteFloatRes_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_LOAD(SDNode *N); SDValue PromoteFloatRes_SELECT(SDNode *N); @@ -730,7 +731,7 @@ SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N); SDValue SoftPromoteHalfRes_FMAD(SDNode *N); - SDValue SoftPromoteHalfRes_FPOWI(SDNode *N); + SDValue SoftPromoteHalfRes_ExpOp(SDNode *N); SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N); SDValue SoftPromoteHalfRes_LOAD(SDNode *N); SDValue SoftPromoteHalfRes_SELECT(SDNode *N); @@ -783,7 +784,7 @@ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); - SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_ExpOp(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); @@ -859,8 +860,7 @@ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); @@ -905,7 +905,7 @@ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); - SDValue SplitVecOp_FCOPYSIGN(SDNode *N); + SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N); SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N); //===--------------------------------------------------------------------===// @@ -981,7 +981,7 @@ SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_IS_FPCLASS(SDNode *N); - SDValue WidenVecRes_POWI(SDNode *N); + SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); @@ -1011,6 +1011,7 @@ SDValue WidenVecOp_VECREDUCE(SDNode *N); SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); SDValue WidenVecOp_VP_REDUCE(SDNode *N); + SDValue WidenVecOp_ExpOp(SDNode *N); /// Helper function to generate a set of operations to perform /// a vector operation for a wider type. Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -377,6 +377,7 @@ case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FLDEXP: case ISD::FPOWI: case ISD::FPOW: case ISD::FLOG: Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -57,7 +57,7 @@ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; - case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; @@ -126,6 +126,7 @@ case ISD::FMAXNUM_IEEE: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FLDEXP: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -348,10 +349,10 @@ N->getOperand(1)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_ExpOp(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FPOWI, SDLoc(N), - Op.getValueType(), Op, N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, + N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -960,8 +961,9 @@ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; - case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; - case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::FPOWI: + case ISD::FLDEXP: + case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break; case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SPLAT_VECTOR: @@ -1463,16 +1465,11 @@ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign); } -void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDLoc dl(N); - GetSplitVector(N->getOperand(0), Lo, Hi); - Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); - Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); -} - -void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, - SDValue &Hi) { +// Handle splitting an FP where the second operand does not match the first +// type. The second operand may be a scalar, or a vector that has exactly as +// many elements as the first +void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, + SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDLoc DL(N); @@ -1480,14 +1477,18 @@ SDValue RHSLo, RHSHi; SDValue RHS = N->getOperand(1); EVT RHSVT = RHS.getValueType(); - if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) - GetSplitVector(RHS, RHSLo, RHSHi); - else - std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); - + if (RHSVT.isVector()) { + if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) + GetSplitVector(RHS, RHSLo, RHSHi); + else + std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); - Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo); - Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); + Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi); + } else { + Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS); + Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS); + } } void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, @@ -2846,7 +2847,7 @@ case ISD::STRICT_FP_ROUND: case ISD::VP_FP_ROUND: case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; - case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; + case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); break; @@ -2900,6 +2901,9 @@ case ISD::FTRUNC: Res = SplitVecOp_UnaryOp(N); break; + case ISD::FLDEXP: + Res = SplitVecOp_FPOpDifferentTypes(N); + break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: @@ -3845,10 +3849,12 @@ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } -SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { - // The result (and the first input) has a legal vector type, but the second - // input needs splitting. - +// Split a vector type in an FP binary operation where the second operand has a +// different type from the first. +// +// The result (and the first input) has a legal vector type, but the second +// input needs splitting. +SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) { SDLoc DL(N); EVT LHSLoVT, LHSHiVT; @@ -3864,8 +3870,8 @@ SDValue RHSLo, RHSHi; std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL); - SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo); - SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi); + SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo); + SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi); return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi); } @@ -4075,8 +4081,9 @@ Res = WidenVecRes_IS_FPCLASS(N); break; + case ISD::FLDEXP: case ISD::FPOWI: - Res = WidenVecRes_POWI(N); + Res = WidenVecRes_ExpOp(N); break; case ISD::ANY_EXTEND_VECTOR_INREG: @@ -4433,10 +4440,18 @@ for (unsigned i = 1; i < NumOpers; ++i) { SDValue Oper = N->getOperand(i); - if (Oper.getValueType().isVector()) { - assert(Oper.getValueType() == N->getValueType(0) && - "Invalid operand type to widen!"); - Oper = GetWidenedVector(Oper); + EVT OpVT = Oper.getValueType(); + if (OpVT.isVector()) { + if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector) + Oper = GetWidenedVector(Oper); + else { + EVT WideOpVT = + EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(), + WidenVT.getVectorElementCount()); + Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, + DAG.getUNDEF(WideOpVT), Oper, + DAG.getVectorIdxConstant(0, dl)); + } } InOps.push_back(Oper); @@ -4454,9 +4469,14 @@ for (unsigned i = 0; i < NumOpers; ++i) { SDValue Op = InOps[i]; - if (Op.getValueType().isVector()) - Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, + EVT OpVT = Op.getValueType(); + if (OpVT.isVector()) { + EVT OpExtractVT = + EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(), + VT.getVectorElementCount()); + Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpExtractVT, Op, DAG.getVectorIdxConstant(Idx, dl)); + } EOps.push_back(Op); } @@ -4480,8 +4500,10 @@ for (unsigned i = 0; i < NumOpers; ++i) { SDValue Op = InOps[i]; - if (Op.getValueType().isVector()) - Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op, + EVT OpVT = Op.getValueType(); + if (OpVT.isVector()) + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OpVT.getVectorElementType(), Op, DAG.getVectorIdxConstant(Idx, dl)); EOps.push_back(Op); @@ -4790,11 +4812,13 @@ N->getFlags()); } -SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - SDValue ShOp = N->getOperand(1); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); + SDValue RHS = N->getOperand(1); + SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS; + + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp); } SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4945,7 +4945,8 @@ case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FRINT: - case ISD::FNEARBYINT: { + case ISD::FNEARBYINT: + case ISD::FLDEXP: { if (SNaN) return true; return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6424,6 +6424,12 @@ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::ldexp: + setValue(&I, DAG.getNode(ISD::FLDEXP, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), Flags)); + return; case Intrinsic::arithmetic_fence: { setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -8521,6 +8527,12 @@ if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; + case LibFunc_ldexp: + case LibFunc_ldexpf: + case LibFunc_ldexpl: + if (visitBinaryFloatCall(I, ISD::FLDEXP)) + return; + break; case LibFunc_memcmp: if (visitMemCmpBCmpCall(I)) return; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -283,6 +283,8 @@ case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; + case ISD::FLDEXP: return "fldexp"; + case ISD::STRICT_FLDEXP: return "strict_fldexp"; case ISD::FPOWI: return "fpowi"; case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -209,6 +209,13 @@ if (TT.isOSOpenBSD()) { setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr); } + + if (TT.isOSWindows() && !TT.isOSCygMing()) { + setLibcallName(RTLIB::LDEXP_F32, nullptr); + setLibcallName(RTLIB::LDEXP_F80, nullptr); + setLibcallName(RTLIB::LDEXP_F128, nullptr); + setLibcallName(RTLIB::LDEXP_PPCF128, nullptr); + } } /// GetFPLibCall - Helper to return the right libcall for the given floating @@ -498,6 +505,11 @@ POWI_PPCF128); } +RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) { + return getFPLibCall(RetVT, LDEXP_F32, LDEXP_F64, LDEXP_F80, LDEXP_F128, + LDEXP_PPCF128); +} + RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT) { unsigned ModeN, ModelN; @@ -845,7 +857,8 @@ setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand); + setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP}, + VT, Expand); // These operations default to expand for vector types. if (VT.isVector()) Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -168,6 +168,7 @@ case ISD::FFLOOR: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FLDEXP: case AMDGPUISD::FRACT: case AMDGPUISD::CLAMP: case AMDGPUISD::COS_HW: @@ -179,7 +180,6 @@ case AMDGPUISD::RCP: case AMDGPUISD::RSQ: case AMDGPUISD::RCP_IFLAG: - case AMDGPUISD::LDEXP: // On gfx10, all 16-bit instructions preserve the high bits. return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9; case ISD::FP_ROUND: Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -438,7 +438,6 @@ RCP_IFLAG, FMUL_LEGACY, RSQ_CLAMP, - LDEXP, FP_CLASS, DOT4, CARRY, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2612,7 +2612,7 @@ ShAmt); // On GCN, use LDEXP directly. if (Subtarget->isGCN()) - return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt); + return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt); // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent // part directly to emulate the multiplication of 2^ShAmt. That 8-bit @@ -2645,7 +2645,7 @@ SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); - SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, + SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi, DAG.getConstant(32, SL, MVT::i32)); // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); @@ -4636,7 +4636,6 @@ NODE_NAME_CASE(RCP_IFLAG) NODE_NAME_CASE(FMUL_LEGACY) NODE_NAME_CASE(RSQ_CLAMP) - NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(FP_CLASS) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(CARRY) @@ -5043,7 +5042,7 @@ // TODO: Need is known positive check. return false; } - case AMDGPUISD::LDEXP: + case ISD::FLDEXP: case AMDGPUISD::FRACT: { if (SNaN) return true; Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -18,10 +18,6 @@ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; -def AMDGPULdExpOp : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] ->; - def AMDGPUFPClassOp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] >; @@ -128,8 +124,6 @@ // out = 1.0 / sqrt(a) result clamped to +/- max_float. def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; -def AMDGPUldexp_impl : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; - def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; @@ -389,10 +383,6 @@ def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src), (AMDGPUfract_impl node:$src)]>; -def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1), - [(int_amdgcn_ldexp node:$src0, node:$src1), - (AMDGPUldexp_impl node:$src0, node:$src1)]>; - def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_class node:$src0, node:$src1), (AMDGPUfp_class_impl node:$src0, node:$src1)]>; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -798,6 +798,13 @@ .legalFor({S32, S64, S16}) .scalarize(0) .clampScalar(0, S16, S64); + + getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP}) + .legalFor({{S32, S32}, {S64, S32}, {S16, S16}}) + .scalarize(0) + .maxScalarIf(typeIs(0, S16), 1, S16) + .clampScalar(1, S32, S32) + .lower(); } else { getActionDefinitionsBuilder(G_FSQRT) .legalFor({S32, S64}) @@ -816,6 +823,13 @@ .scalarize(0) .clampScalar(0, S32, S64); } + + getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP}) + .legalFor({{S32, S32}, {S64, S32}}) + .scalarize(0) + .clampScalar(0, S32, S64) + .clampScalar(1, S32, S32) + .lower(); } getActionDefinitionsBuilder(G_FPTRUNC) @@ -2237,9 +2251,7 @@ : B.buildUITOFP(S64, Unmerge.getReg(1)); auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0)); - auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false) - .addUse(CvtHi.getReg(0)) - .addUse(ThirtyTwo.getReg(0)); + auto LdExp = B.buildFLdexp(S64, CvtHi, ThirtyTwo); // TODO: Should this propagate fast-math-flags? B.buildFAdd(Dst, LdExp, CvtLo); @@ -2270,10 +2282,7 @@ auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust); auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2); auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt); - B.buildIntrinsic(Intrinsic::amdgcn_ldexp, ArrayRef{Dst}, - /*HasSideEffects=*/false) - .addUse(FVal.getReg(0)) - .addUse(Scale.getReg(0)); + B.buildFLdexp(Dst, FVal, Scale); MI.eraseFromParent(); return true; } Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3741,6 +3741,7 @@ case AMDGPU::G_FPEXT: case AMDGPU::G_FEXP2: case AMDGPU::G_FLOG2: + case AMDGPU::G_FLDEXP: case AMDGPU::G_FMINNUM: case AMDGPU::G_FMAXNUM: case AMDGPU::G_FMINNUM_IEEE: @@ -3751,6 +3752,7 @@ case AMDGPU::G_STRICT_FSUB: case AMDGPU::G_STRICT_FMUL: case AMDGPU::G_STRICT_FMA: + case AMDGPU::G_STRICT_FLDEXP: case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar? case AMDGPU::G_FSHR: // TODO: Expand for scalar case AMDGPU::G_AMDGPU_FMIN_LEGACY: @@ -4214,7 +4216,6 @@ case Intrinsic::amdgcn_rsq_clamp: case Intrinsic::amdgcn_fmul_legacy: case Intrinsic::amdgcn_fma_legacy: - case Intrinsic::amdgcn_ldexp: case Intrinsic::amdgcn_frexp_mant: case Intrinsic::amdgcn_frexp_exp: case Intrinsic::amdgcn_fract: Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -141,6 +141,7 @@ /// Custom lowering for ISD::FP_ROUND for MVT::f16. SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const; SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -470,6 +470,8 @@ MVT::f64, Custom); setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, {MVT::f32, MVT::f64}, + Legal); setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom); setOperationAction(ISD::FDIV, MVT::f64, Custom); @@ -526,7 +528,7 @@ // F16 - VOP2 Actions. setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand); - + setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, MVT::f16, Custom); setOperationAction(ISD::FDIV, MVT::f16, Custom); // F16 - VOP3 Actions. @@ -4828,6 +4830,9 @@ case ISD::FMINNUM: case ISD::FMAXNUM: return lowerFMINNUM_FMAXNUM(Op, DAG); + case ISD::FLDEXP: + case ISD::STRICT_FLDEXP: + return lowerFLDEXP(Op, DAG); case ISD::FMA: return splitTernaryVectorOp(Op, DAG); case ISD::FP_TO_SINT: @@ -5446,6 +5451,40 @@ return Op; } +SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op.getOpcode() == ISD::STRICT_FLDEXP; + EVT VT = Op.getValueType(); + assert(VT == MVT::f16); + + SDValue Exp = Op.getOperand(IsStrict ? 2 : 1); + EVT ExpVT = Exp.getValueType(); + if (ExpVT == MVT::i16) + return Op; + + SDLoc DL(Op); + + // Correct the exponent type for f16 to i16. + // Clamp the range of the exponent to the instruction's range. + + // TODO: This should be a generic narrowing legalization, and can easily be + // for GlobalISel. + + SDValue MinExp = DAG.getConstant(minIntN(16), DL, ExpVT); + SDValue ClampMin = DAG.getNode(ISD::SMAX, DL, ExpVT, Exp, MinExp); + + SDValue MaxExp = DAG.getConstant(maxIntN(16), DL, ExpVT); + SDValue Clamp = DAG.getNode(ISD::SMIN, DL, ExpVT, ClampMin, MaxExp); + + SDValue TruncExp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Clamp); + + if (IsStrict) { + return DAG.getNode(ISD::STRICT_FLDEXP, DL, {VT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1), TruncExp}); + } + + return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(0), TruncExp); +} + SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc SL(Op); @@ -7133,8 +7172,7 @@ return emitRemovedIntrinsicError(DAG, DL, VT); } case Intrinsic::amdgcn_ldexp: - return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, - Op.getOperand(1), Op.getOperand(2)); + return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::amdgcn_fract: return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); @@ -10281,6 +10319,7 @@ case ISD::FREM: case ISD::FP_ROUND: case ISD::FP_EXTEND: + case ISD::FLDEXP: case AMDGPUISD::FMUL_LEGACY: case AMDGPUISD::FMAD_FTZ: case AMDGPUISD::RCP: @@ -10292,7 +10331,6 @@ case AMDGPUISD::DIV_FMAS: case AMDGPUISD::DIV_FIXUP: case AMDGPUISD::FRACT: - case AMDGPUISD::LDEXP: case AMDGPUISD::CVT_PKRTZ_F16_F32: case AMDGPUISD::CVT_F32_UBYTE0: case AMDGPUISD::CVT_F32_UBYTE1: @@ -11878,12 +11916,12 @@ return performFCanonicalizeCombine(N, DCI); case AMDGPUISD::RCP: return performRcpCombine(N, DCI); + case ISD::FLDEXP: case AMDGPUISD::FRACT: case AMDGPUISD::RSQ: case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::RCP_IFLAG: - case AMDGPUISD::RSQ_CLAMP: - case AMDGPUISD::LDEXP: { + case AMDGPUISD::RSQ_CLAMP: { // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted SDValue Src = N->getOperand(0); if (Src.isUndef()) Index: llvm/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -777,7 +777,7 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; } // End IsNeverUniform = 1 -defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; +defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; @@ -863,7 +863,7 @@ // 16-Bit Operand Instructions //===----------------------------------------------------------------------===// -def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16 { +def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16 { // The ldexp.f16 intrinsic expects a i32 src1 operand, though the hardware // encoding treats src1 as an f16 let Src1RC32 = RegisterOperand; @@ -874,9 +874,9 @@ let isReMaterializable = 1 in { let FPDPRounding = 1 in { let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in - defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; + defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16, any_fldexp>; let SubtargetPredicate = HasTrue16BitInsts in - defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, AMDGPUldexp>; + defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, any_fldexp>; } // End FPDPRounding = 1 // FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -219,7 +219,7 @@ let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in { defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile, AMDGPUdiv_fixup>; - defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, AMDGPUldexp>; + defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, any_fldexp>; } // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1 } // End isReMaterializable = 1 Index: llvm/lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1635,7 +1635,7 @@ ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, - ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, + ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, ISD::FLDEXP, // Misc: ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool, // Vector: Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -849,6 +849,7 @@ setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FLDEXP, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -619,6 +619,7 @@ setOperationAction(ISD::FROUND, VT, Action); setOperationAction(ISD::FROUNDEVEN, VT, Action); setOperationAction(ISD::FTRUNC, VT, Action); + setOperationAction(ISD::FLDEXP, VT, Action); }; if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -678,6 +679,7 @@ setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote); setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote); setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FLDEXP, MVT::f16, Promote); setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote); setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote); setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote); Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -470,6 +470,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: G_FLDEXP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK @@ -653,6 +656,9 @@ # DEBUG-NEXT: G_STRICT_FSQRT (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_STRICT_FLDEXP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_READ_REGISTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir +++ /dev/null @@ -1,134 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s - ---- -name: ldexp_s32_vsv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: ldexp_s32_vsv - ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 - S_ENDPGM 0, implicit %2 -... - ---- -name: ldexp_s32_vvs -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: ldexp_s32_vvs - ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 - S_ENDPGM 0, implicit %2 -... - ---- -name: ldexp_s32_vvv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - ; GCN-LABEL: name: ldexp_s32_vvv - ; GCN: liveins: $vgpr0, $vgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 - S_ENDPGM 0, implicit %2 -... - ---- -name: ldexp_s64_vsv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $vgpr0 - ; GCN-LABEL: name: ldexp_s64_vsv - ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 - S_ENDPGM 0, implicit %2 -... - ---- -name: ldexp_s64_vvs -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $vgpr0 - ; GCN-LABEL: name: ldexp_s64_vvs - ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 - S_ENDPGM 0, implicit %2 -... - ---- -name: ldexp_s64_vvv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 - ; GCN-LABEL: name: ldexp_s64_vvv - ; GCN: liveins: $vgpr0_vgpr1, $vgpr2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 - S_ENDPGM 0, implicit %2 -... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir +++ /dev/null @@ -1,73 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s - -# SI-ERR: remark: :0:0: cannot select: %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2:sgpr(s16), %1:vgpr(s32) (in function: ldexp_s16_vsv) -# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2:vgpr(s16), %1:sgpr(s32) (in function: ldexp_s16_vvs) -# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2:vgpr(s16), %1:vgpr(s32) (in function: ldexp_s16_vvv) - ---- -name: ldexp_s16_vsv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: ldexp_s16_vsv - ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %3 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 - S_ENDPGM 0, implicit %3 -... - ---- -name: ldexp_s16_vvs -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: ldexp_s16_vvs - ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 - S_ENDPGM 0, implicit %3 -... - ---- -name: ldexp_s16_vvv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - ; GCN-LABEL: name: ldexp_s16_vvv - ; GCN: liveins: $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 - S_ENDPGM 0, implicit %3 -... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -132,8 +132,8 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) ; GFX8-LABEL: name: test_sitofp_s64_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -154,8 +154,8 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SITOFP %0 $vgpr0 = COPY %1 @@ -175,8 +175,8 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[SITOFP]], [[C]](s32) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; GFX8-LABEL: name: test_sitofp_s64_to_s64 ; GFX8: liveins: $vgpr0_vgpr1 @@ -186,8 +186,8 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32) - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[SITOFP]], [[C]](s32) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SITOFP %0 @@ -476,8 +476,8 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) ; GFX8-LABEL: name: test_sitofp_s33_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -499,8 +499,8 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s32) = G_SITOFP %1 @@ -533,8 +533,8 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_sitofp_s64_to_s16 @@ -557,8 +557,8 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 @@ -594,14 +594,14 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32) ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]] ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]] ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) @@ -609,8 +609,8 @@ ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] ; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32) ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32) - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP1]], [[SUB3]](s32) + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -639,14 +639,14 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]] ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32) ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]] - ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]] + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]] ; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) @@ -654,8 +654,8 @@ ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] ; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32) ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX8-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32) - ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP1]], [[SUB3]](s32) + ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir @@ -98,8 +98,8 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) ; GFX8-LABEL: name: test_uitofp_s64_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -115,8 +115,8 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_UITOFP %0 $vgpr0 = COPY %1 @@ -136,8 +136,8 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[UITOFP]], [[C]](s32) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP1]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; GFX8-LABEL: name: test_uitofp_s64_to_s64 ; GFX8: liveins: $vgpr0_vgpr1 @@ -147,8 +147,8 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32) - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[UITOFP]], [[C]](s32) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP1]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_UITOFP %0 @@ -444,8 +444,8 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) ; GFX8-LABEL: name: test_uitofp_s33_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -463,8 +463,8 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s32) = G_UITOFP %1 @@ -492,8 +492,8 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_uitofp_s64_to_s16 @@ -511,8 +511,8 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 @@ -543,8 +543,8 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32) ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] @@ -554,8 +554,8 @@ ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32) ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32) - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP1]], [[SUB1]](s32) + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -579,8 +579,8 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32) ; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] @@ -590,8 +590,8 @@ ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32) ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32) - ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP1]], [[SUB1]](s32) + ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 Index: llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll @@ -0,0 +1,640 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s + +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s + +define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) { +; GFX6-LABEL: test_ldexp_f32_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v3 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_f32_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f32 v0, v2, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_f32_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f32 v0, v2, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_f32_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call float @llvm.ldexp.f32.i32(float %a, i32 %b) + ret float %result +} + +define <2 x float> @test_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) { +; GFX6-LABEL: test_ldexp_v2f32_v2i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v4 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v3, v5 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_v2f32_v2i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX8-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_v2f32_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX9-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v2f32_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX11-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %a, <2 x i32> %b) + ret <2 x float> %result +} + +define <3 x float> @test_ldexp_v3f32_v3i32(ptr addrspace(1) %out, <3 x float> %a, <3 x i32> %b) { +; GFX6-LABEL: test_ldexp_v3f32_v3i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v5 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v3, v6 +; GFX6-NEXT: v_ldexp_f32_e32 v2, v4, v7 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_v3f32_v3i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX8-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX8-NEXT: v_ldexp_f32 v2, v4, v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_v3f32_v3i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX9-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX9-NEXT: v_ldexp_f32 v2, v4, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v3f32_v3i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX11-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX11-NEXT: v_ldexp_f32 v2, v4, v7 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float> %a, <3 x i32> %b) + ret <3 x float> %result +} + +define <4 x float> @test_ldexp_v4f32_v4i32(ptr addrspace(1) %out, <4 x float> %a, <4 x i32> %b) { +; GFX6-LABEL: test_ldexp_v4f32_v4i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v6 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v3, v7 +; GFX6-NEXT: v_ldexp_f32_e32 v2, v4, v8 +; GFX6-NEXT: v_ldexp_f32_e32 v3, v5, v9 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_v4f32_v4i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX8-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX8-NEXT: v_ldexp_f32 v2, v4, v8 +; GFX8-NEXT: v_ldexp_f32 v3, v5, v9 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_v4f32_v4i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX9-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX9-NEXT: v_ldexp_f32 v2, v4, v8 +; GFX9-NEXT: v_ldexp_f32 v3, v5, v9 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v4f32_v4i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX11-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX11-NEXT: v_ldexp_f32 v2, v4, v8 +; GFX11-NEXT: v_ldexp_f32 v3, v5, v9 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %a, <4 x i32> %b) + ret <4 x float> %result +} + +define double @test_ldexp_f64_i32(double %a, i32 %b) { +; GFX6-LABEL: test_ldexp_f64_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_f64_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_f64_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_f64_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call double @llvm.ldexp.f64.i32(double %a, i32 %b) + ret double %result +} + +define <2 x double> @test_ldexp_v2f64_v2i32(<2 x double> %a, <2 x i32> %b) { +; GFX6-LABEL: test_ldexp_v2f64_v2i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX6-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_v2f64_v2i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX8-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_v2f64_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v2f64_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %a, <2 x i32> %b) + ret <2 x double> %result +} + +; Broken for DAG +; define float @test_ldexp_f32_i16(float %a, i16 %b) { +; %result = call float @llvm.ldexp.f32.i16(float %a, i16 %b) +; ret float %result +; } + +; FIXME: Should be able to truncate to i32 +; define float @test_ldexp_f32_i64(float %a, i64 %b) { +; %result = call float @llvm.ldexp.f32.i64(float %a, i64 %b) +; ret float %result +; } + +; define <2 x float> @test_ldexp_v2f32_v2i16(<2 x float> %a, <2 x i16> %b) { +; %result = call <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> %a, <2 x i16> %b) +; ret <2 x float> %result +; } + +; FIXME: Should be able to truncate to i32 +; define <2 x float> @test_ldexp_v2f32_v2i64(<2 x float> %a, <2 x i64> %b) { +; %result = call <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float> %a, <2 x i64> %b) +; ret <2 x float> %result +; } + +define half @test_ldexp_f16_i8(half %a, i8 %b) { +; GFX6-SDAG-LABEL: test_ldexp_f16_i8: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_ldexp_f16_i8: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_f16_i8: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_f16_i8: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_f16_i8: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_f16_i8: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX8-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_f16_i8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_f16_i8: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call half @llvm.ldexp.f16.i8(half %a, i8 %b) + ret half %result +} + +define half @test_ldexp_f16_i16(half %a, i16 %b) { +; GFX6-SDAG-LABEL: test_ldexp_f16_i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_f16_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_f16_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_f16_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_f16_i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call half @llvm.ldexp.f16.i16(half %a, i16 %b) + ret half %result +} + +define half @test_ldexp_f16_i32(half %a, i32 %b) { +; GFX6-SDAG-LABEL: test_ldexp_f16_i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_ldexp_f16_i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX8-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_f16_i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_f16_i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_f16_i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_f16_i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX8-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_f16_i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_f16_i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call half @llvm.ldexp.f16.i32(half %a, i32 %b) + ret half %result +} + +define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) { +; GFX6-SDAG-LABEL: test_ldexp_v2f16_v2i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX8-SDAG-NEXT: v_med3_i32 v2, v2, s4, v3 +; GFX8-SDAG-NEXT: v_med3_i32 v1, v1, s4, v3 +; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v2, v2, s4, v3 +; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v3 +; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff +; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2 +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX8-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4 +; GFX8-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4 +; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 +; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b) + ret <2 x half> %result +} + +define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) { +; GFX6-SDAG-LABEL: test_ldexp_v2f16_v2i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX6-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i16: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2 +; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-GISEL-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX6-GISEL-NEXT: v_bfe_i32 v2, v3, 0, 16 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v3 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b) + ret <2 x half> %result +} + +declare float @llvm.ldexp.f32.i32(float, i32) #0 +declare float @llvm.ldexp.f32.i16(float, i16) #0 +declare float @llvm.ldexp.f32.i64(float, i64) #0 +declare half @llvm.ldexp.f16.i8(half, i8) #0 +declare half @llvm.ldexp.f16.i16(half, i16) #0 +declare half @llvm.ldexp.f16.i32(half, i32) #0 +declare <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>) #0 +declare <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>) #0 +declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0 +declare <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>) #0 +declare <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float>, <2 x i64>) #0 +declare <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>) #0 +declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0 +declare double @llvm.ldexp.f64.i32(double, i32) #0 +declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0 + +attributes #0 = { nounwind readnone } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} Index: llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll @@ -0,0 +1,400 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; FIXME: Enable f16 promotion +; XUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s + +; XUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s + +; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) #0 { +; %result = call half @llvm.experimental.constrained.ldexp.f16.i16(half %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") +; ret half %result +; } + +define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 { +; GFX8-SDAG-LABEL: test_ldexp_f16_i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX8-SDAG-NEXT: v_med3_i32 v0, v3, s4, v0 +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_f16_i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v0, v3, s4, v0 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_f16_i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_f16_i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX8-GISEL-NEXT: v_med3_i32 v0, v3, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_f16_i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v3, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_f16_i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v3, v0 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret half %result +} + +; define <2 x half> @test_ldexp_v2f16_v2i16(ptr addrspace(1) %out, <2 x half> %a, <2 x i16> %b) #0 { +; %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") +; ret <2 x half> %result +; } + +define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a, <2 x i32> %b) #0 { +; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX8-SDAG-NEXT: v_med3_i32 v1, v3, s4, v0 +; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0 +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v2, v1 +; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v1, v3, s4, v0 +; GFX9-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v2, v1 +; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v1, s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff +; GFX11-SDAG-NEXT: v_med3_i32 v1, v4, s0, 0x7fff +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 +; GFX11-SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX8-GISEL-NEXT: v_med3_i32 v3, v3, v0, v1 +; GFX8-GISEL-NEXT: v_med3_i32 v0, v4, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v3, v2, v3 +; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v3, v3, v0, v1 +; GFX9-GISEL-NEXT: v_med3_i32 v0, v4, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v3, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v3 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v3, v0 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v4, v0 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v3, v0 +; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x half> %result +} + +define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a, <3 x i32> %b) #0 { +; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v1 +; GFX8-SDAG-NEXT: v_med3_i32 v4, v5, s4, v1 +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-SDAG-NEXT: v_med3_i32 v1, v6, s4, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v0, v4, s4, v1 +; GFX9-SDAG-NEXT: v_med3_i32 v4, v5, s4, v1 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x5040100 +; GFX9-SDAG-NEXT: v_med3_i32 v1, v6, s4, v1 +; GFX9-SDAG-NEXT: v_perm_b32 v0, v2, v0, s5 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_med3_i32 v0, v4, s0, 0x7fff +; GFX11-SDAG-NEXT: v_med3_i32 v1, v5, s0, 0x7fff +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-SDAG-NEXT: v_med3_i32 v2, v6, s0, 0x7fff +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v4, v1 +; GFX11-SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX8-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 +; GFX8-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 +; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v2 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 +; GFX9-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v0 +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v4 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v4, v0 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v0 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v5 +; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v6, v0 +; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v4 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <3 x half> %result +} + +define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a, <4 x i32> %b) #0 { +; GFX8-SDAG-LABEL: test_ldexp_v4f16_v4i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX8-SDAG-NEXT: v_med3_i32 v1, v7, s4, v0 +; GFX8-SDAG-NEXT: v_med3_i32 v6, v6, s4, v0 +; GFX8-SDAG-NEXT: v_med3_i32 v5, v5, s4, v0 +; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0 +; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v3, v3, v6 +; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v5, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v4f16_v4i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v1, v6, s4, v0 +; GFX9-SDAG-NEXT: v_med3_i32 v6, v7, s4, v0 +; GFX9-SDAG-NEXT: v_med3_i32 v4, v4, s4, v0 +; GFX9-SDAG-NEXT: v_med3_i32 v0, v5, s4, v0 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 +; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v4, v2, v4 +; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v4, s4 +; GFX9-SDAG-NEXT: v_perm_b32 v1, v3, v1, s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-NEXT: v_med3_i32 v0, v6, s0, 0x7fff +; GFX11-SDAG-NEXT: v_med3_i32 v1, v7, s0, 0x7fff +; GFX11-SDAG-NEXT: v_med3_i32 v4, v4, s0, 0x7fff +; GFX11-SDAG-NEXT: v_med3_i32 v5, v5, s0, 0x7fff +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v3, v0 +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v4 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v6, v5 +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v7, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX11-SDAG-NEXT: v_perm_b32 v1, v1, v3, 0x5040100 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v4f16_v4i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX8-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 +; GFX8-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 +; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_med3_i32 v5, v6, v0, v1 +; GFX8-GISEL-NEXT: v_med3_i32 v0, v7, v0, v1 +; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v5, v3, v5 +; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v2 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v4f16_v4i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 +; GFX9-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_med3_i32 v5, v6, v0, v1 +; GFX9-GISEL-NEXT: v_med3_i32 v0, v7, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v5, v3, v5 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v4 +; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v5 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v4, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v6, 0xffff8000, v6, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v7, v0 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v2, v4 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v3, v6 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v4, v8, v0 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 +; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v4, 16, v2 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x half> %result +} + +declare half @llvm.experimental.constrained.ldexp.f16.i16(half, i16, metadata, metadata) #1 +declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1 +declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>, metadata, metadata) #1 +declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>, metadata, metadata) #1 +declare <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half>, <3 x i32>, metadata, metadata) #1 +declare <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half>, <4 x i32>, metadata, metadata) #1 + +attributes #0 = { strictfp } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GFX11: {{.*}} +; GFX8: {{.*}} +; GFX9: {{.*}} Index: llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll @@ -0,0 +1,255 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s + +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s + +; define float @test_ldexp_f32_i16(ptr addrspace(1) %out, float %a, i16 %b) #0 { +; %result = call float @llvm.experimental.constrained.ldexp.f32.i16(float %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") +; ret float %result +; } + +define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) #0 { +; GFX6-LABEL: test_ldexp_f32_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v3 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_f32_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f32 v0, v2, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_f32_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f32 v0, v2, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_f32_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call float @llvm.experimental.constrained.ldexp.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %result +} + +; define <2 x float> @test_ldexp_v2f32_v2i16(ptr addrspace(1) %out, <2 x float> %a, <2 x i16> %b) #0 { +; %result = call <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i16(<2 x float> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") +; ret <2 x float> %result +; } + +define <2 x float> @test_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) #0 { +; GFX6-SDAG-LABEL: test_ldexp_v2f32_v2i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v3, v5 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v2, v4 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_ldexp_v2f32_v2i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX8-SDAG-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v2f32_v2i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v2f32_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX11-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_v2f32_v2i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v2, v4 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v3, v5 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v2f32_v2i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX8-GISEL-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v2f32_v2i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v2, v4 +; GFX9-GISEL-NEXT: v_ldexp_f32 v1, v3, v5 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x float> %result +} + +define <3 x float> @test_ldexp_v3f32_v3i32(ptr addrspace(1) %out, <3 x float> %a, <3 x i32> %b) #0 { +; GFX6-SDAG-LABEL: test_ldexp_v3f32_v3i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v4, v4, v7 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v3, v6 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v2, v5 +; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, v4 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_ldexp_v3f32_v3i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_ldexp_f32 v4, v4, v7 +; GFX8-SDAG-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX8-SDAG-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v3f32_v3i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f32 v4, v4, v7 +; GFX9-SDAG-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v3f32_v3i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX11-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX11-NEXT: v_ldexp_f32 v2, v4, v7 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_v3f32_v3i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v2, v5 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v3, v6 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v4, v7 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v3f32_v3i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX8-GISEL-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX8-GISEL-NEXT: v_ldexp_f32 v2, v4, v7 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v3f32_v3i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v2, v5 +; GFX9-GISEL-NEXT: v_ldexp_f32 v1, v3, v6 +; GFX9-GISEL-NEXT: v_ldexp_f32 v2, v4, v7 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <3 x float> @llvm.experimental.constrained.ldexp.v3f32.v3i32(<3 x float> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <3 x float> %result +} + +define <4 x float> @test_ldexp_v4f32_v4i32(ptr addrspace(1) %out, <4 x float> %a, <4 x i32> %b) #0 { +; GFX6-SDAG-LABEL: test_ldexp_v4f32_v4i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v5, v5, v9 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v4, v4, v8 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v3, v7 +; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v2, v6 +; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, v4 +; GFX6-SDAG-NEXT: v_mov_b32_e32 v3, v5 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_ldexp_v4f32_v4i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_ldexp_f32 v5, v5, v9 +; GFX8-SDAG-NEXT: v_ldexp_f32 v4, v4, v8 +; GFX8-SDAG-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX8-SDAG-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, v4 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, v5 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_ldexp_v4f32_v4i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f32 v5, v5, v9 +; GFX9-SDAG-NEXT: v_ldexp_f32 v4, v4, v8 +; GFX9-SDAG-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, v5 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v4f32_v4i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX11-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX11-NEXT: v_ldexp_f32 v2, v4, v8 +; GFX11-NEXT: v_ldexp_f32 v3, v5, v9 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_ldexp_v4f32_v4i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v2, v6 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v3, v7 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v4, v8 +; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v3, v5, v9 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_ldexp_v4f32_v4i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX8-GISEL-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX8-GISEL-NEXT: v_ldexp_f32 v2, v4, v8 +; GFX8-GISEL-NEXT: v_ldexp_f32 v3, v5, v9 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_ldexp_v4f32_v4i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v2, v6 +; GFX9-GISEL-NEXT: v_ldexp_f32 v1, v3, v7 +; GFX9-GISEL-NEXT: v_ldexp_f32 v2, v4, v8 +; GFX9-GISEL-NEXT: v_ldexp_f32 v3, v5, v9 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <4 x float> @llvm.experimental.constrained.ldexp.v4f32.v4i32(<4 x float> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x float> %result +} + +declare float @llvm.experimental.constrained.ldexp.f32.i16(float, i16, metadata, metadata) #1 +declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) #1 +declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>, metadata, metadata) #1 +declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>, metadata, metadata) #1 +declare <3 x float> @llvm.experimental.constrained.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>, metadata, metadata) #1 +declare <4 x float> @llvm.experimental.constrained.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>, metadata, metadata) #1 + +attributes #0 = { strictfp } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GFX11-GISEL: {{.*}} +; GFX11-SDAG: {{.*}} Index: llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s + +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s + +; define double @test_ldexp_f64_i16(ptr addrspace(1) %out, double %a, i16 %b) #0 { +; %result = call double @llvm.experimental.constrained.ldexp.f64.i16(double %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") +; ret double %result +; } + +define double @test_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #0 { +; GFX6-LABEL: test_ldexp_f64_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_f64_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_f64_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_f64_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call double @llvm.experimental.constrained.ldexp.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret double %result +} + +; define <2 x double> @test_ldexp_v2f64_v2i16(ptr addrspace(1) %out, <2 x double> %a, <2 x i16> %b) #0 { +; %result = call <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i16(<2 x double> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") +; ret <2 x double> %result +; } + +define <2 x double> @test_ldexp_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %a, <2 x i32> %b) #0 { +; GFX6-LABEL: test_ldexp_v2f64_v2i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 +; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_v2f64_v2i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 +; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_v2f64_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 +; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v2f64_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 +; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i32(<2 x double> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x double> %result +} + +define <3 x double> @test_ldexp_v3f64_v3i32(ptr addrspace(1) %out, <3 x double> %a, <3 x i32> %b) #0 { +; GFX6-LABEL: test_ldexp_v3f64_v3i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 +; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 +; GFX6-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_v3f64_v3i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 +; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 +; GFX8-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_v3f64_v3i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 +; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 +; GFX9-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v3f64_v3i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 +; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 +; GFX11-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call <3 x double> @llvm.experimental.constrained.ldexp.v3f64.v3i32(<3 x double> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <3 x double> %result +} + +define <4 x double> @test_ldexp_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %a, <4 x i32> %b) #0 { +; GFX6-LABEL: test_ldexp_v4f64_v4i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 +; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 +; GFX6-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 +; GFX6-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_ldexp_v4f64_v4i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 +; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 +; GFX8-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 +; GFX8-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_ldexp_v4f64_v4i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 +; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 +; GFX9-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 +; GFX9-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_ldexp_v4f64_v4i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 +; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 +; GFX11-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 +; GFX11-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call <4 x double> @llvm.experimental.constrained.ldexp.v4f64.v4i32(<4 x double> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x double> %result +} + +declare double @llvm.experimental.constrained.ldexp.f64.i16(double, i16, metadata, metadata) #1 +declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) #1 +declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i16(<2 x double>, <2 x i16>, metadata, metadata) #1 +declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>, metadata, metadata) #1 +declare <3 x double> @llvm.experimental.constrained.ldexp.v3f64.v3i32(<3 x double>, <3 x i32>, metadata, metadata) #1 +declare <4 x double> @llvm.experimental.constrained.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>, metadata, metadata) #1 + +attributes #0 = { strictfp } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GFX11-GISEL: {{.*}} +; GFX11-SDAG: {{.*}} +; GFX6-GISEL: {{.*}} +; GFX6-SDAG: {{.*}} +; GFX8-GISEL: {{.*}} +; GFX8-SDAG: {{.*}} +; GFX9-GISEL: {{.*}} +; GFX9-SDAG: {{.*}} Index: llvm/test/CodeGen/Mips/ldexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/ldexp.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=mips-- -mattr=+soft-float < %s | FileCheck -check-prefix=SOFT %s + +define float @ldexp_f32(i8 zeroext %x) { +; SOFT-LABEL: ldexp_f32: +; SOFT: # %bb.0: +; SOFT-NEXT: addiu $sp, $sp, -24 +; SOFT-NEXT: .cfi_def_cfa_offset 24 +; SOFT-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-NEXT: .cfi_offset 31, -4 +; SOFT-NEXT: move $5, $4 +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: lui $4, 16256 +; SOFT-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-NEXT: jr $ra +; SOFT-NEXT: addiu $sp, $sp, 24 + %zext = zext i8 %x to i32 + %ldexp = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 %zext) + ret float %ldexp +} + +define double @ldexp_f64(i8 zeroext %x) { +; SOFT-LABEL: ldexp_f64: +; SOFT: # %bb.0: +; SOFT-NEXT: addiu $sp, $sp, -24 +; SOFT-NEXT: .cfi_def_cfa_offset 24 +; SOFT-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-NEXT: .cfi_offset 31, -4 +; SOFT-NEXT: move $6, $4 +; SOFT-NEXT: lui $4, 16368 +; SOFT-NEXT: jal ldexp +; SOFT-NEXT: addiu $5, $zero, 0 +; SOFT-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-NEXT: jr $ra +; SOFT-NEXT: addiu $sp, $sp, 24 + %zext = zext i8 %x to i32 + %ldexp = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 %zext) + ret double %ldexp +} + +define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { +; SOFT-LABEL: ldexp_v2f32: +; SOFT: # %bb.0: +; SOFT-NEXT: addiu $sp, $sp, -32 +; SOFT-NEXT: .cfi_def_cfa_offset 32 +; SOFT-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; SOFT-NEXT: .cfi_offset 31, -4 +; SOFT-NEXT: .cfi_offset 17, -8 +; SOFT-NEXT: .cfi_offset 16, -12 +; SOFT-NEXT: move $16, $6 +; SOFT-NEXT: move $17, $4 +; SOFT-NEXT: lw $5, 52($sp) +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: move $4, $7 +; SOFT-NEXT: lw $5, 48($sp) +; SOFT-NEXT: sw $2, 4($17) +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: move $4, $16 +; SOFT-NEXT: sw $2, 0($17) +; SOFT-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload +; SOFT-NEXT: jr $ra +; SOFT-NEXT: addiu $sp, $sp, 32 + %1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %val, <2 x i32> %exp) + ret <2 x float> %1 +} + +define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { +; SOFT-LABEL: ldexp_v4f32: +; SOFT: # %bb.0: +; SOFT-NEXT: addiu $sp, $sp, -40 +; SOFT-NEXT: .cfi_def_cfa_offset 40 +; SOFT-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $20, 32($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $19, 28($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $18, 24($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $17, 20($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-NEXT: .cfi_offset 31, -4 +; SOFT-NEXT: .cfi_offset 20, -8 +; SOFT-NEXT: .cfi_offset 19, -12 +; SOFT-NEXT: .cfi_offset 18, -16 +; SOFT-NEXT: .cfi_offset 17, -20 +; SOFT-NEXT: .cfi_offset 16, -24 +; SOFT-NEXT: move $16, $7 +; SOFT-NEXT: move $18, $4 +; SOFT-NEXT: lw $4, 60($sp) +; SOFT-NEXT: lw $5, 76($sp) +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: move $17, $6 +; SOFT-NEXT: lw $19, 64($sp) +; SOFT-NEXT: lw $20, 68($sp) +; SOFT-NEXT: lw $5, 72($sp) +; SOFT-NEXT: lw $4, 56($sp) +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: sw $2, 12($18) +; SOFT-NEXT: sw $2, 8($18) +; SOFT-NEXT: move $4, $16 +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: move $5, $20 +; SOFT-NEXT: sw $2, 4($18) +; SOFT-NEXT: move $4, $17 +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: move $5, $19 +; SOFT-NEXT: sw $2, 0($18) +; SOFT-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $17, 20($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $18, 24($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $19, 28($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $20, 32($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; SOFT-NEXT: jr $ra +; SOFT-NEXT: addiu $sp, $sp, 40 + %1 = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %val, <4 x i32> %exp) + ret <4 x float> %1 +} + +define half @ldexp_f16(half %arg0, i32 %arg1) { +; SOFT-LABEL: ldexp_f16: +; SOFT: # %bb.0: +; SOFT-NEXT: addiu $sp, $sp, -24 +; SOFT-NEXT: .cfi_def_cfa_offset 24 +; SOFT-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-NEXT: .cfi_offset 31, -4 +; SOFT-NEXT: .cfi_offset 16, -8 +; SOFT-NEXT: move $16, $5 +; SOFT-NEXT: jal __gnu_h2f_ieee +; SOFT-NEXT: andi $4, $4, 65535 +; SOFT-NEXT: move $4, $2 +; SOFT-NEXT: jal ldexpf +; SOFT-NEXT: move $5, $16 +; SOFT-NEXT: jal __gnu_f2h_ieee +; SOFT-NEXT: move $4, $2 +; SOFT-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-NEXT: jr $ra +; SOFT-NEXT: addiu $sp, $sp, 24 + %ldexp = call half @llvm.ldexp.f16.i32(half %arg0, i32 %arg1) + ret half %ldexp +} + +define x86_fp80 @ldexp_f80(x86_fp80 %arg0, i32 %arg1) { +; SOFT-LABEL: ldexp_f80: +; SOFT: # %bb.0: +; SOFT-NEXT: addiu $sp, $sp, -24 +; SOFT-NEXT: .cfi_def_cfa_offset 24 +; SOFT-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-NEXT: .cfi_offset 31, -4 +; SOFT-NEXT: jal ldexpl +; SOFT-NEXT: andi $4, $4, 65535 +; SOFT-NEXT: move $4, $2 +; SOFT-NEXT: addiu $2, $zero, 0 +; SOFT-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-NEXT: jr $ra +; SOFT-NEXT: addiu $sp, $sp, 24 + %ldexp = call x86_fp80 @llvm.ldexp.f80.i32(x86_fp80 %arg0, i32 %arg1) + ret x86_fp80 %ldexp +} + + +declare double @llvm.ldexp.f64.i32(double, i32) #0 +declare float @llvm.ldexp.f32.i32(float, i32) #0 +declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0 +declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0 +declare x86_fp80 @llvm.ldexp.f80.i32(x86_fp80, i32) +declare half @llvm.ldexp.f16.i32(half, i32) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } Index: llvm/test/CodeGen/PowerPC/ldexp-libcall.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s + +define float @call_ldexpf(float %a, i32 %b) { +; CHECK-LABEL: call_ldexpf: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call float @ldexpf(float %a, i32 %b) + ret float %result +} + +define double @call_ldexp(double %a, i32 %b) { +; CHECK-LABEL: call_ldexp: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: bl ldexp +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call double @ldexp(double %a, i32 %b) + ret double %result +} + +define ppc_fp128 @call_ldexpl(ppc_fp128 %a, i32 %b) { +; CHECK-LABEL: call_ldexpl: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: clrldi r5, r5, 32 +; CHECK-NEXT: bl ldexpl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call ppc_fp128 @ldexpl(ppc_fp128 %a, i32 %b) + ret ppc_fp128 %result +} + +declare float @ldexpf(float %a, i32 %b) #0 +declare double @ldexp(double %a, i32 %b) #0 +declare ppc_fp128 @ldexpl(ppc_fp128 %a, i32 %b) #0 + +attributes #0 = { nounwind readonly } Index: llvm/test/CodeGen/PowerPC/ldexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/ldexp.ll @@ -0,0 +1,223 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s + +define float @ldexp_f32(i8 zeroext %x) { +; CHECK-LABEL: ldexp_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: vspltisw v2, 1 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: xvcvsxwdp vs1, v2 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %zext = zext i8 %x to i32 + %ldexp = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 %zext) + ret float %ldexp +} + +define double @ldexp_f64(i8 zeroext %x) { +; CHECK-LABEL: ldexp_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: vspltisw v2, 1 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: xvcvsxwdp vs1, v2 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl ldexp +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %zext = zext i8 %x to i32 + %ldexp = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 %zext) + ret double %ldexp +} + +define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { +; CHECK-LABEL: ldexp_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -96(r1) +; CHECK-NEXT: std r0, 112(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 12 +; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: vmr v30, v2 +; CHECK-NEXT: vextuwrx r4, r3, v3 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xscpsgndp v29, f1, f1 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-NEXT: xxmrghd vs0, v29, vs1 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: xvcvdpsp v28, vs0 +; CHECK-NEXT: xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: xscpsgndp v29, f1, f1 +; CHECK-NEXT: mfvsrwz r4, v31 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-NEXT: xxmrghd vs0, vs1, v29 +; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v29, 48(r1) # 16-byte Folded Reload +; CHECK-NEXT: xvcvdpsp v2, vs0 +; CHECK-NEXT: vmrgew v2, v28, v2 +; CHECK-NEXT: lxv v28, 32(r1) # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 96 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %val, <2 x i32> %exp) + ret <2 x float> %1 +} + +define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { +; CHECK-LABEL: ldexp_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -96(r1) +; CHECK-NEXT: std r0, 112(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 12 +; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: vmr v30, v2 +; CHECK-NEXT: vextuwrx r4, r3, v3 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xscpsgndp v29, f1, f1 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-NEXT: xxmrghd vs0, v29, vs1 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: xvcvdpsp v28, vs0 +; CHECK-NEXT: xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: xscpsgndp v29, f1, f1 +; CHECK-NEXT: mfvsrwz r4, v31 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-NEXT: xxmrghd vs0, vs1, v29 +; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v29, 48(r1) # 16-byte Folded Reload +; CHECK-NEXT: xvcvdpsp v2, vs0 +; CHECK-NEXT: vmrgew v2, v28, v2 +; CHECK-NEXT: lxv v28, 32(r1) # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 96 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %1 = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %val, <4 x i32> %exp) + ret <4 x float> %1 +} + +define half @ldexp_f16(half %arg0, i32 %arg1) { +; CHECK-LABEL: ldexp_f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %ldexp = call half @llvm.ldexp.f16.i32(half %arg0, i32 %arg1) + ret half %ldexp +} + +define ppc_fp128 @ldexp_fp128(ppc_fp128 %arg0, i32 %arg1) { +; CHECK-LABEL: ldexp_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: clrldi r5, r5, 32 +; CHECK-NEXT: bl ldexpl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %ldexp = call ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128 %arg0, i32 %arg1) + ret ppc_fp128 %ldexp +} + +declare double @llvm.ldexp.f64.i32(double, i32) #0 +declare float @llvm.ldexp.f32.i32(float, i32) #0 +declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0 +declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0 +declare half @llvm.ldexp.f16.i32(half, i32) #0 +declare ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128, i32) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } Index: llvm/test/CodeGen/X86/ldexp-f80.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/ldexp-f80.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s +; FIXME: Expansion without libcall +; XUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=WIN32 %s + +define x86_fp80 @ldexp_f80(x86_fp80 %arg0, i32 %arg1) { +; X64-LABEL: ldexp_f80: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: callq ldexpl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %ldexp = call x86_fp80 @llvm.ldexp.f80.i32(x86_fp80 %arg0, i32 %arg1) + ret x86_fp80 %ldexp +} + +define x86_fp80 @test_strict_ldexp_f80_i32(ptr addrspace(1) %out, x86_fp80 %a, i32 %b) #2 { +; X64-LABEL: test_strict_ldexp_f80_i32: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: movl %esi, %edi +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq ldexpl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.ldexp.f80.i32(x86_fp80 %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret x86_fp80 %result +} + +declare x86_fp80 @llvm.ldexp.f80.i32(x86_fp80, i32) +declare x86_fp80 @llvm.experimental.constrained.ldexp.f80.i32(x86_fp80, i32, metadata, metadata) #1 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { strictfp } Index: llvm/test/CodeGen/X86/ldexp-libcall.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/ldexp-libcall.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN + +define float @call_ldexpf(float %a, i32 %b) { +; CHECK-LABEL: call_ldexpf: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp ldexpf@PLT # TAILCALL +; +; CHECK-WIN-LABEL: call_ldexpf: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: subl $8, %esp +; CHECK-WIN-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-WIN-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstps (%esp) +; CHECK-WIN-NEXT: calll _ldexpf +; CHECK-WIN-NEXT: addl $8, %esp +; CHECK-WIN-NEXT: retl + %result = call float @ldexpf(float %a, i32 %b) + ret float %result +} + +define double @call_ldexp(double %a, i32 %b) { +; CHECK-LABEL: call_ldexp: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp ldexp@PLT # TAILCALL +; +; CHECK-WIN-LABEL: call_ldexp: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: subl $12, %esp +; CHECK-WIN-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-WIN-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstpl (%esp) +; CHECK-WIN-NEXT: calll _ldexp +; CHECK-WIN-NEXT: addl $12, %esp +; CHECK-WIN-NEXT: retl + %result = call double @ldexp(double %a, i32 %b) + ret double %result +} + +define x86_fp80 @call_ldexpl(x86_fp80 %a, i32 %b) { +; CHECK-LABEL: call_ldexpl: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq ldexpl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-WIN-LABEL: call_ldexpl: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: pushl %ebp +; CHECK-WIN-NEXT: movl %esp, %ebp +; CHECK-WIN-NEXT: andl $-16, %esp +; CHECK-WIN-NEXT: subl $48, %esp +; CHECK-WIN-NEXT: fldt 8(%ebp) +; CHECK-WIN-NEXT: movl 24(%ebp), %eax +; CHECK-WIN-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstpt (%esp) +; CHECK-WIN-NEXT: calll _ldexpl +; CHECK-WIN-NEXT: movl %ebp, %esp +; CHECK-WIN-NEXT: popl %ebp +; CHECK-WIN-NEXT: retl + %result = call x86_fp80 @ldexpl(x86_fp80 %a, i32 %b) + ret x86_fp80 %result +} + +declare float @ldexpf(float %a, i32 %b) #0 +declare double @ldexp(double %a, i32 %b) #0 +declare x86_fp80 @ldexpl(x86_fp80 %a, i32 %b) #0 + +attributes #0 = { nounwind readonly } Index: llvm/test/CodeGen/X86/ldexp-not-readonly.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/ldexp-not-readonly.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN + +define float @call_ldexpf(float %a, i32 %b) { +; CHECK-LABEL: call_ldexpf: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq ldexpf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-WIN-LABEL: call_ldexpf: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: subl $8, %esp +; CHECK-WIN-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-WIN-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstps (%esp) +; CHECK-WIN-NEXT: calll _ldexpf +; CHECK-WIN-NEXT: addl $8, %esp +; CHECK-WIN-NEXT: retl + %result = call float @ldexpf(float %a, i32 %b) + ret float %result +} + +define double @call_ldexp(double %a, i32 %b) { +; CHECK-LABEL: call_ldexp: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq ldexp@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-WIN-LABEL: call_ldexp: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: subl $12, %esp +; CHECK-WIN-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-WIN-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstpl (%esp) +; CHECK-WIN-NEXT: calll _ldexp +; CHECK-WIN-NEXT: addl $12, %esp +; CHECK-WIN-NEXT: retl + %result = call double @ldexp(double %a, i32 %b) + ret double %result +} + +declare float @ldexpf(float %a, i32 %b) +declare double @ldexp(double %a, i32 %b) Index: llvm/test/CodeGen/X86/ldexp-strict.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/ldexp-strict.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s +; XUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=WIN32 %s +; FIXME: Expansion support without libcalls + +; FIXME: Implement f16->f32 promotion for strictfp +; define half @test_strict_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #2 { +; %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") +; ret half %result +; } + +define float @test_strict_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) #2 { +; X64-LABEL: test_strict_ldexp_f32_i32: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movl %esi, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %result = call float @llvm.experimental.constrained.ldexp.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %result +} + +define double @test_strict_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #2 { +; X64-LABEL: test_strict_ldexp_f64_i32: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movl %esi, %edi +; X64-NEXT: callq ldexp@PLT +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %result = call double @llvm.experimental.constrained.ldexp.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret double %result +} + + +define <2 x float> @test_strict_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) #2 { +; X64-LABEL: test_strict_ldexp_v2f32_v2i32: +; X64: # %bb.0: +; X64-NEXT: subq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 64 +; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = mem[1,1,1,1] +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: addq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %result = call <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x float> %result +} + +declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1 +declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) #1 +declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) #1 +declare x86_fp80 @llvm.experimental.constrained.ldexp.f80.i32(x86_fp80, i32, metadata, metadata) #1 +declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>, metadata, metadata) #1 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { strictfp } Index: llvm/test/CodeGen/X86/ldexp-wrong-signature.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/ldexp-wrong-signature.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN + +define float @ldexpf_too_many_args(float %a, i32 %b, i32 %c) { +; CHECK-LABEL: ldexpf_too_many_args: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq ldexpf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-WIN-LABEL: ldexpf_too_many_args: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: subl $12, %esp +; CHECK-WIN-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-WIN-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-WIN-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstps (%esp) +; CHECK-WIN-NEXT: calll _ldexpf +; CHECK-WIN-NEXT: addl $12, %esp +; CHECK-WIN-NEXT: retl + %result = call float @ldexpf(float %a, i32 %b, i32 %c) #0 + ret float %result +} + +define float @ldexp_wrong_fp_type(float %a, i32 %b) { +; CHECK-LABEL: ldexp_wrong_fp_type: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq ldexp@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-WIN-LABEL: ldexp_wrong_fp_type: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: subl $8, %esp +; CHECK-WIN-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-WIN-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstps (%esp) +; CHECK-WIN-NEXT: calll _ldexp +; CHECK-WIN-NEXT: addl $8, %esp +; CHECK-WIN-NEXT: retl + %result = call float @ldexp(float %a, i32 %b) #0 + ret float %result +} + +declare float @ldexpf(float, i32, i32) #0 +declare float @ldexp(float, i32) #0 + +attributes #0 = { nounwind readnone } Index: llvm/test/CodeGen/X86/ldexp-wrong-signature2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/ldexp-wrong-signature2.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN + +define i32 @ldexpf_not_fp(i32 %a, i32 %b) { +; CHECK-LABEL: ldexpf_not_fp: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq ldexpf@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-WIN-LABEL: ldexpf_not_fp: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: calll _ldexpf +; CHECK-WIN-NEXT: addl $8, %esp +; CHECK-WIN-NEXT: retl + %result = call i32 @ldexpf(i32 %a, i32 %b) #0 + ret i32 %result +} + +define float @ldexp_not_int(float %a, float %b) { +; CHECK-LABEL: ldexp_not_int: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq ldexp@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-WIN-LABEL: ldexp_not_int: +; CHECK-WIN: # %bb.0: +; CHECK-WIN-NEXT: subl $8, %esp +; CHECK-WIN-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-WIN-NEXT: fstps (%esp) +; CHECK-WIN-NEXT: calll _ldexp +; CHECK-WIN-NEXT: addl $8, %esp +; CHECK-WIN-NEXT: retl + %result = call float @ldexp(float %a, float %b) #0 + ret float %result +} + +declare i32 @ldexpf(i32, i32) #0 +declare float @ldexp(float, float) #0 + +attributes #0 = { nounwind readnone } Index: llvm/test/CodeGen/X86/ldexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/ldexp.ll @@ -0,0 +1,784 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s +; RUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=WIN32 %s + +define float @ldexp_f32(i8 zeroext %x) { +; X64-LABEL: ldexp_f32: +; X64: # %bb.0: +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jmp ldexpf@PLT # TAILCALL +; +; WIN32-LABEL: ldexp_f32: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %eax +; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: cmpl $381, %ecx # imm = 0x17D +; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: jl LBB0_2 +; WIN32-NEXT: # %bb.1: +; WIN32-NEXT: movl $381, %eax # imm = 0x17D +; WIN32-NEXT: LBB0_2: +; WIN32-NEXT: addl $-254, %eax +; WIN32-NEXT: leal -127(%ecx), %edx +; WIN32-NEXT: cmpl $255, %ecx +; WIN32-NEXT: jae LBB0_4 +; WIN32-NEXT: # %bb.3: +; WIN32-NEXT: movl %edx, %eax +; WIN32-NEXT: LBB0_4: +; WIN32-NEXT: flds __real@7f800000 +; WIN32-NEXT: flds __real@7f000000 +; WIN32-NEXT: jae LBB0_6 +; WIN32-NEXT: # %bb.5: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB0_6: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: cmpl $-329, %ecx # imm = 0xFEB7 +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: jge LBB0_8 +; WIN32-NEXT: # %bb.7: +; WIN32-NEXT: movl $-330, %edx # imm = 0xFEB6 +; WIN32-NEXT: LBB0_8: +; WIN32-NEXT: cmpl $-228, %ecx +; WIN32-NEXT: fldz +; WIN32-NEXT: flds __real@0c800000 +; WIN32-NEXT: jb LBB0_9 +; WIN32-NEXT: # %bb.10: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: leal 102(%ecx), %edx +; WIN32-NEXT: cmpl $-126, %ecx +; WIN32-NEXT: jge LBB0_12 +; WIN32-NEXT: jmp LBB0_13 +; WIN32-NEXT: LBB0_9: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $204, %edx +; WIN32-NEXT: cmpl $-126, %ecx +; WIN32-NEXT: jl LBB0_13 +; WIN32-NEXT: LBB0_12: +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: LBB0_13: +; WIN32-NEXT: fld1 +; WIN32-NEXT: jl LBB0_15 +; WIN32-NEXT: # %bb.14: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB0_15: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: cmpl $127, %ecx +; WIN32-NEXT: jg LBB0_17 +; WIN32-NEXT: # %bb.16: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: movl %edx, %eax +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB0_17: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: shll $23, %eax +; WIN32-NEXT: addl $1065353216, %eax # imm = 0x3F800000 +; WIN32-NEXT: movl %eax, (%esp) +; WIN32-NEXT: fmuls (%esp) +; WIN32-NEXT: popl %eax +; WIN32-NEXT: retl + %zext = zext i8 %x to i32 + %ldexp = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 %zext) + ret float %ldexp +} + +define double @ldexp_f64(i8 zeroext %x) { +; X64-LABEL: ldexp_f64: +; X64: # %bb.0: +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: jmp ldexp@PLT # TAILCALL +; +; WIN32-LABEL: ldexp_f64: +; WIN32: # %bb.0: +; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: pushl %eax +; WIN32-NEXT: pushl $1072693248 # imm = 0x3FF00000 +; WIN32-NEXT: pushl $0 +; WIN32-NEXT: calll _ldexp +; WIN32-NEXT: addl $12, %esp +; WIN32-NEXT: retl + %zext = zext i8 %x to i32 + %ldexp = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 %zext) + ret double %ldexp +} + +define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { +; X64-LABEL: ldexp_v2f32: +; X64: # %bb.0: +; X64-NEXT: subq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 80 +; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] +; X64-NEXT: movd %xmm2, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = mem[2,3,2,3] +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movd %xmm0, %edi +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = mem[1,1,1,1] +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = xmm1[0],mem[0] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: addq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: ldexp_v2f32: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $8, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: cmpl $-329, %eax # imm = 0xFEB7 +; WIN32-NEXT: movl %eax, %edx +; WIN32-NEXT: jge LBB2_2 +; WIN32-NEXT: # %bb.1: +; WIN32-NEXT: movl $-330, %edx # imm = 0xFEB6 +; WIN32-NEXT: LBB2_2: +; WIN32-NEXT: addl $204, %edx +; WIN32-NEXT: leal 102(%eax), %ecx +; WIN32-NEXT: cmpl $-228, %eax +; WIN32-NEXT: jb LBB2_4 +; WIN32-NEXT: # %bb.3: +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: LBB2_4: +; WIN32-NEXT: flds __real@0c800000 +; WIN32-NEXT: fld %st(1) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: jb LBB2_6 +; WIN32-NEXT: # %bb.5: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB2_6: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: cmpl $-126, %eax +; WIN32-NEXT: jl LBB2_8 +; WIN32-NEXT: # %bb.7: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fld %st(1) +; WIN32-NEXT: movl %eax, %edx +; WIN32-NEXT: LBB2_8: +; WIN32-NEXT: cmpl $381, %eax # imm = 0x17D +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: jl LBB2_10 +; WIN32-NEXT: # %bb.9: +; WIN32-NEXT: movl $381, %esi # imm = 0x17D +; WIN32-NEXT: LBB2_10: +; WIN32-NEXT: flds __real@7f000000 +; WIN32-NEXT: fmul %st, %st(3) +; WIN32-NEXT: fld %st(3) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: leal -127(%eax), %ecx +; WIN32-NEXT: cmpl $255, %eax +; WIN32-NEXT: jae LBB2_11 +; WIN32-NEXT: # %bb.12: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: jmp LBB2_13 +; WIN32-NEXT: LBB2_11: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: addl $-254, %esi +; WIN32-NEXT: movl %esi, %ecx +; WIN32-NEXT: LBB2_13: +; WIN32-NEXT: cmpl $127, %eax +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: jg LBB2_15 +; WIN32-NEXT: # %bb.14: +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: LBB2_15: +; WIN32-NEXT: cmpl $381, %esi # imm = 0x17D +; WIN32-NEXT: movl %esi, %edx +; WIN32-NEXT: jl LBB2_17 +; WIN32-NEXT: # %bb.16: +; WIN32-NEXT: movl $381, %edx # imm = 0x17D +; WIN32-NEXT: LBB2_17: +; WIN32-NEXT: addl $-254, %edx +; WIN32-NEXT: leal -127(%esi), %edi +; WIN32-NEXT: cmpl $255, %esi +; WIN32-NEXT: jae LBB2_19 +; WIN32-NEXT: # %bb.18: +; WIN32-NEXT: movl %edi, %edx +; WIN32-NEXT: LBB2_19: +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: fmul %st, %st(2) +; WIN32-NEXT: jae LBB2_21 +; WIN32-NEXT: # %bb.20: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB2_21: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: cmpl $-329, %esi # imm = 0xFEB7 +; WIN32-NEXT: movl %esi, %edi +; WIN32-NEXT: jge LBB2_23 +; WIN32-NEXT: # %bb.22: +; WIN32-NEXT: movl $-330, %edi # imm = 0xFEB6 +; WIN32-NEXT: LBB2_23: +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(4), %st +; WIN32-NEXT: fmul %st, %st(4) +; WIN32-NEXT: cmpl $-228, %esi +; WIN32-NEXT: jb LBB2_24 +; WIN32-NEXT: # %bb.25: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: leal 102(%esi), %edi +; WIN32-NEXT: cmpl $-126, %esi +; WIN32-NEXT: jge LBB2_27 +; WIN32-NEXT: jmp LBB2_28 +; WIN32-NEXT: LBB2_24: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $204, %edi +; WIN32-NEXT: cmpl $-126, %esi +; WIN32-NEXT: jl LBB2_28 +; WIN32-NEXT: LBB2_27: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: movl %esi, %edi +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB2_28: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: cmpl $127, %esi +; WIN32-NEXT: jg LBB2_30 +; WIN32-NEXT: # %bb.29: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl %edi, %edx +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: LBB2_30: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: cmpl $127, %eax +; WIN32-NEXT: jg LBB2_32 +; WIN32-NEXT: # %bb.31: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB2_32: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: shll $23, %ecx +; WIN32-NEXT: addl $1065353216, %ecx # imm = 0x3F800000 +; WIN32-NEXT: movl %ecx, (%esp) +; WIN32-NEXT: shll $23, %edx +; WIN32-NEXT: addl $1065353216, %edx # imm = 0x3F800000 +; WIN32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fmuls (%esp) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fmuls {{[0-9]+}}(%esp) +; WIN32-NEXT: addl $8, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: retl + %1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %val, <2 x i32> %exp) + ret <2 x float> %1 +} + +define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { +; X64-LABEL: ldexp_v4f32: +; X64: # %bb.0: +; X64-NEXT: subq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 80 +; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] +; X64-NEXT: movd %xmm2, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = mem[2,3,2,3] +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movd %xmm0, %edi +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = mem[1,1,1,1] +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = xmm1[0],mem[0] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: addq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: ldexp_v4f32: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: pushl %ebx +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $32, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: flds __real@7f000000 +; WIN32-NEXT: fld %st(1) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: cmpl $255, %ecx +; WIN32-NEXT: jae LBB3_2 +; WIN32-NEXT: # %bb.1: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB3_2: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: cmpl $-329, %ecx # imm = 0xFEB7 +; WIN32-NEXT: movl %ecx, %esi +; WIN32-NEXT: jge LBB3_4 +; WIN32-NEXT: # %bb.3: +; WIN32-NEXT: movl $-330, %esi # imm = 0xFEB6 +; WIN32-NEXT: LBB3_4: +; WIN32-NEXT: addl $204, %esi +; WIN32-NEXT: leal 102(%ecx), %eax +; WIN32-NEXT: cmpl $-228, %ecx +; WIN32-NEXT: jb LBB3_6 +; WIN32-NEXT: # %bb.5: +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: LBB3_6: +; WIN32-NEXT: flds __real@0c800000 +; WIN32-NEXT: fld %st(3) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: jb LBB3_8 +; WIN32-NEXT: # %bb.7: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB3_8: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: cmpl $-126, %ecx +; WIN32-NEXT: jl LBB3_10 +; WIN32-NEXT: # %bb.9: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(4) +; WIN32-NEXT: LBB3_10: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: subl $127, %edx +; WIN32-NEXT: jg LBB3_12 +; WIN32-NEXT: # %bb.11: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB3_12: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: fld %st(3) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(3), %st +; WIN32-NEXT: cmpl $255, %edi +; WIN32-NEXT: jae LBB3_14 +; WIN32-NEXT: # %bb.13: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB3_14: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; WIN32-NEXT: cmpl $-329, %edi # imm = 0xFEB7 +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: jge LBB3_16 +; WIN32-NEXT: # %bb.15: +; WIN32-NEXT: movl $-330, %eax # imm = 0xFEB6 +; WIN32-NEXT: LBB3_16: +; WIN32-NEXT: fld %st(3) +; WIN32-NEXT: fmul %st(3), %st +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(4), %st +; WIN32-NEXT: cmpl $-228, %edi +; WIN32-NEXT: jb LBB3_17 +; WIN32-NEXT: # %bb.18: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: leal 102(%edi), %eax +; WIN32-NEXT: cmpl $-126, %edi +; WIN32-NEXT: jge LBB3_20 +; WIN32-NEXT: jmp LBB3_21 +; WIN32-NEXT: LBB3_17: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: addl $204, %eax +; WIN32-NEXT: cmpl $-126, %edi +; WIN32-NEXT: jl LBB3_21 +; WIN32-NEXT: LBB3_20: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(4) +; WIN32-NEXT: LBB3_21: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: movl %edi, %ebx +; WIN32-NEXT: subl $127, %ebx +; WIN32-NEXT: jg LBB3_23 +; WIN32-NEXT: # %bb.22: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: LBB3_23: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: cmpl $381, %edi # imm = 0x17D +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: jge LBB3_24 +; WIN32-NEXT: # %bb.25: +; WIN32-NEXT: cmpl $255, %edi +; WIN32-NEXT: jae LBB3_26 +; WIN32-NEXT: LBB3_27: +; WIN32-NEXT: cmpl $-126, %ecx +; WIN32-NEXT: jl LBB3_29 +; WIN32-NEXT: LBB3_28: +; WIN32-NEXT: movl %ecx, %esi +; WIN32-NEXT: LBB3_29: +; WIN32-NEXT: cmpl $381, %ecx # imm = 0x17D +; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: jl LBB3_31 +; WIN32-NEXT: # %bb.30: +; WIN32-NEXT: movl $381, %eax # imm = 0x17D +; WIN32-NEXT: LBB3_31: +; WIN32-NEXT: cmpl $255, %ecx +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; WIN32-NEXT: jb LBB3_33 +; WIN32-NEXT: # %bb.32: +; WIN32-NEXT: addl $-254, %eax +; WIN32-NEXT: movl %eax, %edx +; WIN32-NEXT: LBB3_33: +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; WIN32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: cmpl $381, %ebp # imm = 0x17D +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: jl LBB3_35 +; WIN32-NEXT: # %bb.34: +; WIN32-NEXT: movl $381, %eax # imm = 0x17D +; WIN32-NEXT: LBB3_35: +; WIN32-NEXT: fld %st(2) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: leal -127(%ebp), %edi +; WIN32-NEXT: cmpl $255, %ebp +; WIN32-NEXT: jae LBB3_36 +; WIN32-NEXT: # %bb.37: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: jmp LBB3_38 +; WIN32-NEXT: LBB3_24: +; WIN32-NEXT: movl $381, %eax # imm = 0x17D +; WIN32-NEXT: cmpl $255, %edi +; WIN32-NEXT: jb LBB3_27 +; WIN32-NEXT: LBB3_26: +; WIN32-NEXT: addl $-254, %eax +; WIN32-NEXT: movl %eax, %ebx +; WIN32-NEXT: cmpl $-126, %ecx +; WIN32-NEXT: jge LBB3_28 +; WIN32-NEXT: jmp LBB3_29 +; WIN32-NEXT: LBB3_36: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: addl $-254, %eax +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: LBB3_38: +; WIN32-NEXT: cmpl $-329, %ebp # imm = 0xFEB7 +; WIN32-NEXT: movl %ebp, %ecx +; WIN32-NEXT: jge LBB3_40 +; WIN32-NEXT: # %bb.39: +; WIN32-NEXT: movl $-330, %ecx # imm = 0xFEB6 +; WIN32-NEXT: LBB3_40: +; WIN32-NEXT: addl $204, %ecx +; WIN32-NEXT: leal 102(%ebp), %eax +; WIN32-NEXT: cmpl $-228, %ebp +; WIN32-NEXT: jb LBB3_42 +; WIN32-NEXT: # %bb.41: +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: LBB3_42: +; WIN32-NEXT: fld %st(3) +; WIN32-NEXT: fmul %st(3), %st +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(4), %st +; WIN32-NEXT: jb LBB3_44 +; WIN32-NEXT: # %bb.43: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB3_44: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: cmpl $-126, %ebp +; WIN32-NEXT: jl LBB3_46 +; WIN32-NEXT: # %bb.45: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl %ebp, %ecx +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(4) +; WIN32-NEXT: LBB3_46: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: cmpl $127, %ebp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: jg LBB3_48 +; WIN32-NEXT: # %bb.47: +; WIN32-NEXT: movl %ecx, %edi +; WIN32-NEXT: LBB3_48: +; WIN32-NEXT: cmpl $381, %esi # imm = 0x17D +; WIN32-NEXT: movl %esi, %ecx +; WIN32-NEXT: jl LBB3_50 +; WIN32-NEXT: # %bb.49: +; WIN32-NEXT: movl $381, %ecx # imm = 0x17D +; WIN32-NEXT: LBB3_50: +; WIN32-NEXT: addl $-254, %ecx +; WIN32-NEXT: leal -127(%esi), %eax +; WIN32-NEXT: cmpl $255, %esi +; WIN32-NEXT: jae LBB3_52 +; WIN32-NEXT: # %bb.51: +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: LBB3_52: +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(3), %st +; WIN32-NEXT: fmul %st, %st(3) +; WIN32-NEXT: jae LBB3_54 +; WIN32-NEXT: # %bb.53: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB3_54: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: cmpl $-329, %esi # imm = 0xFEB7 +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: jge LBB3_56 +; WIN32-NEXT: # %bb.55: +; WIN32-NEXT: movl $-330, %eax # imm = 0xFEB6 +; WIN32-NEXT: LBB3_56: +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(4), %st +; WIN32-NEXT: fmul %st, %st(4) +; WIN32-NEXT: cmpl $-228, %esi +; WIN32-NEXT: jb LBB3_57 +; WIN32-NEXT: # %bb.58: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: leal 102(%esi), %eax +; WIN32-NEXT: cmpl $-126, %esi +; WIN32-NEXT: jge LBB3_60 +; WIN32-NEXT: jmp LBB3_61 +; WIN32-NEXT: LBB3_57: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $204, %eax +; WIN32-NEXT: cmpl $-126, %esi +; WIN32-NEXT: jl LBB3_61 +; WIN32-NEXT: LBB3_60: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB3_61: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: cmpl $127, %esi +; WIN32-NEXT: jg LBB3_63 +; WIN32-NEXT: # %bb.62: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB3_63: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: cmpl $127, {{[0-9]+}}(%esp) +; WIN32-NEXT: jg LBB3_65 +; WIN32-NEXT: # %bb.64: +; WIN32-NEXT: movl (%esp), %ebx # 4-byte Reload +; WIN32-NEXT: LBB3_65: +; WIN32-NEXT: cmpl $127, {{[0-9]+}}(%esp) +; WIN32-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; WIN32-NEXT: jg LBB3_67 +; WIN32-NEXT: # %bb.66: +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; WIN32-NEXT: LBB3_67: +; WIN32-NEXT: cmpl $127, %ebp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: jg LBB3_69 +; WIN32-NEXT: # %bb.68: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: LBB3_69: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: shll $23, %edi +; WIN32-NEXT: addl $1065353216, %edi # imm = 0x3F800000 +; WIN32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN32-NEXT: shll $23, %ecx +; WIN32-NEXT: addl $1065353216, %ecx # imm = 0x3F800000 +; WIN32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN32-NEXT: shll $23, %ebx +; WIN32-NEXT: addl $1065353216, %ebx # imm = 0x3F800000 +; WIN32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN32-NEXT: shll $23, %edx +; WIN32-NEXT: addl $1065353216, %edx # imm = 0x3F800000 +; WIN32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fmuls {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fmuls {{[0-9]+}}(%esp) +; WIN32-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; WIN32-NEXT: fmuls {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: fmuls {{[0-9]+}}(%esp) +; WIN32-NEXT: fstps 12(%eax) +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: fstps 8(%eax) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fstps 4(%eax) +; WIN32-NEXT: fstps (%eax) +; WIN32-NEXT: addl $32, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp +; WIN32-NEXT: retl + %1 = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %val, <4 x i32> %exp) + ret <4 x float> %1 +} + +define half @ldexp_f16(half %arg0, i32 %arg1) { +; X64-LABEL: ldexp_f16: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: .cfi_offset %rbx, -16 +; X64-NEXT: movl %edi, %ebx +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movl %ebx, %edi +; X64-NEXT: callq ldexpf@PLT +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: ldexp_f16: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $8, %esp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, (%esp) +; WIN32-NEXT: cmpl $381, %edi # imm = 0x17D +; WIN32-NEXT: movl %edi, %esi +; WIN32-NEXT: jl LBB4_2 +; WIN32-NEXT: # %bb.1: +; WIN32-NEXT: movl $381, %esi # imm = 0x17D +; WIN32-NEXT: LBB4_2: +; WIN32-NEXT: addl $-254, %esi +; WIN32-NEXT: calll ___gnu_h2f_ieee +; WIN32-NEXT: leal -127(%edi), %eax +; WIN32-NEXT: cmpl $255, %edi +; WIN32-NEXT: jae LBB4_4 +; WIN32-NEXT: # %bb.3: +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: LBB4_4: +; WIN32-NEXT: flds __real@7f000000 +; WIN32-NEXT: fld %st(1) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fmul %st, %st(1) +; WIN32-NEXT: jae LBB4_6 +; WIN32-NEXT: # %bb.5: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB4_6: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: cmpl $-329, %edi # imm = 0xFEB7 +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: jge LBB4_8 +; WIN32-NEXT: # %bb.7: +; WIN32-NEXT: movl $-330, %eax # imm = 0xFEB6 +; WIN32-NEXT: LBB4_8: +; WIN32-NEXT: flds __real@0c800000 +; WIN32-NEXT: fld %st(2) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fmul %st, %st(1) +; WIN32-NEXT: cmpl $-228, %edi +; WIN32-NEXT: jb LBB4_9 +; WIN32-NEXT: # %bb.10: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: leal 102(%edi), %eax +; WIN32-NEXT: cmpl $-126, %edi +; WIN32-NEXT: jge LBB4_12 +; WIN32-NEXT: jmp LBB4_13 +; WIN32-NEXT: LBB4_9: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $204, %eax +; WIN32-NEXT: cmpl $-126, %edi +; WIN32-NEXT: jl LBB4_13 +; WIN32-NEXT: LBB4_12: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: LBB4_13: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: cmpl $127, %edi +; WIN32-NEXT: jg LBB4_15 +; WIN32-NEXT: # %bb.14: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: fldz +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: LBB4_15: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: shll $23, %esi +; WIN32-NEXT: addl $1065353216, %esi # imm = 0x3F800000 +; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN32-NEXT: fmuls {{[0-9]+}}(%esp) +; WIN32-NEXT: fstps (%esp) +; WIN32-NEXT: calll ___gnu_f2h_ieee +; WIN32-NEXT: addl $8, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: retl + %ldexp = call half @llvm.ldexp.f16.i32(half %arg0, i32 %arg1) + ret half %ldexp +} + +declare double @llvm.ldexp.f64.i32(double, i32) #0 +declare float @llvm.ldexp.f32.i32(float, i32) #0 +declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0 +declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0 +declare half @llvm.ldexp.f16.i32(half, i32) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } Index: llvm/test/MC/AMDGPU/gfx10_asm_vop2.s =================================================================== --- llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -12931,11 +12931,11 @@ v_ldexp_f16_e64 v5, v1, -1 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00] -v_ldexp_f16_e64 v5, v1, 0.5 -// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00] +v_ldexp_f16_e64 v5, v1, 0x3800 +// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] v_ldexp_f16_e64 v5, v1, -4.0 -// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00] +// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] v_ldexp_f16_e64 v5, -v1, v2 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x20] Index: llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s +++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s @@ -734,7 +734,7 @@ // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] v_ldexp_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] v_ldexp_f16_e64 v5, exec_lo, -1 // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] Index: llvm/test/MC/AMDGPU/gfx8_asm_vop3.s =================================================================== --- llvm/test/MC/AMDGPU/gfx8_asm_vop3.s +++ llvm/test/MC/AMDGPU/gfx8_asm_vop3.s @@ -1,4 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding -filetype=null 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s v_interp_p1_f32_e64 v5, v2, attr0.x // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00] @@ -12826,10 +12827,10 @@ // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] v_ldexp_f16_e64 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] +// ERR: [[@LINE-1]]:25: error: literal operands are not supported v_ldexp_f16_e64 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] +// ERR: [[@LINE-1]]:25: error: literal operands are not supported v_ldexp_f16_e64 v5, v1, src_vccz // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00] Index: llvm/test/MC/AMDGPU/gfx9_asm_vop3.s =================================================================== --- llvm/test/MC/AMDGPU/gfx9_asm_vop3.s +++ llvm/test/MC/AMDGPU/gfx9_asm_vop3.s @@ -1,4 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s v_interp_p1_f32_e64 v5, v2, attr0.x // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00] @@ -11239,10 +11240,10 @@ // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] v_ldexp_f16_e64 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] +// ERR: [[@LINE-1]]:25: error: literal operands are not supported v_ldexp_f16_e64 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] +// ERR: [[@LINE-1]]:25: error: literal operands are not supported v_ldexp_f16_e64 v5, v1, src_vccz // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00] Index: llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -7520,13 +7520,13 @@ # GFX10: v_ldexp_f16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00 -# GFX10: v_ldexp_f16_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00] +# GFX10: v_ldexp_f16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00 # GFX10: v_ldexp_f16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00 -# GFX10: v_ldexp_f16_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00] +# GFX10: v_ldexp_f16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00 # GFX10: v_ldexp_f16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x00,0x00] Index: llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt @@ -543,7 +543,7 @@ # GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00 -# GFX11: v_ldexp_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] +# GFX11: v_ldexp_f16_e64 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00 # GFX11: v_ldexp_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] Index: llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt @@ -11178,10 +11178,10 @@ # CHECK: v_ldexp_f16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00 # CHECK: v_ldexp_f16_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20] Index: llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt @@ -8814,10 +8814,10 @@ # CHECK: v_ldexp_f16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00 # CHECK: v_ldexp_f16_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20] Index: llvm/test/Transforms/SpeculativeExecution/spec-calls.ll =================================================================== --- llvm/test/Transforms/SpeculativeExecution/spec-calls.ll +++ llvm/test/Transforms/SpeculativeExecution/spec-calls.ll @@ -302,3 +302,19 @@ } declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) + +; CHECK-LABEL: @ifThen_ldexp( +; CHECK: %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %y) +; CHECK-NEXT: br i1 true +define void @ifThen_ldexp(float %x, i32 %y) { + br i1 true, label %a, label %b + +a: + %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %y) + br label %b + +b: + ret void +} + +declare float @llvm.ldexp.f32.i32(float, i32)