Index: llvm/docs/AMDGPUUsage.rst =================================================================== --- llvm/docs/AMDGPUUsage.rst +++ llvm/docs/AMDGPUUsage.rst @@ -980,6 +980,8 @@ :ref:`llvm.log ` Implemented for float and half (and vectors). + :ref:`llvm.exp ` Implemented for float and half (and vectors). + :ref:`llvm.log10 ` Implemented for float and half (and vectors). ========================================= ========================================================== Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -14639,6 +14639,8 @@ When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +.. _int_exp: + '``llvm.exp.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -143,8 +143,8 @@ accurately. Use llvm.amdgcn.log.f32 to access the old behavior for llvm.log2.f32. -* llvm.exp2.f32 is now lowered accurately. Use llvm.amdgcn.exp2.f32 to - access the old behavior for llvm.exp2.f32. +* llvm.exp2.f32 and llvm.exp.f32 are now lowered accurately. Use + llvm.amdgcn.exp2.f32 to access the old behavior for llvm.exp2.f32. Changes to the ARM Backend -------------------------- Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1858,6 +1858,12 @@ return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0}); } + /// Build and insert \p Dst = G_FRINT \p Src0, \p Src1 + MachineInstrBuilder buildFRint(const DstOp &Dst, const SrcOp &Src0, + std::optional Flags = std::nullopt) { + return buildInstr(TargetOpcode::G_FRINT, {Dst}, {Src0}, Flags); + } + /// Build and insert \p Res = G_SMIN \p Op0, \p Op1 MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -74,6 +74,9 @@ SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, double Log2BaseInverted, SDNodeFlags Flags) const; SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; + + SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, + SDNodeFlags Flags) const; SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -351,7 +351,7 @@ setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom); } - setOperationAction({ISD::FLOG10, ISD::FLOG}, MVT::f16, Custom); + setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP}, MVT::f16, Custom); // FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches // scalarization code. Can be removed when IS_FPCLASS expand isn't called by @@ -1359,6 +1359,10 @@ if (SDValue Lowered = lowerFEXP2(SDValue(N, 0), DAG)) Results.push_back(Lowered); return; + case ISD::FEXP: + if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG)) + Results.push_back(Lowered); + return; default: return; } @@ -2460,12 +2464,16 @@ llvm_unreachable("covered opcode switch"); } +static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags) { + if (Flags.hasApproximateFuncs()) + return true; + auto &Options = DAG.getTarget().Options; + return Options.UnsafeFPMath || Options.ApproxFuncFPMath; +} + static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, SDNodeFlags Flags) { - return !Flags.hasApproximateFuncs() && - !DAG.getTarget().Options.UnsafeFPMath && - !DAG.getTarget().Options.ApproxFuncFPMath && - !valueIsKnownNeverF32Denorm(Src) && + return !valueIsKnownNeverF32Denorm(Src) && DAG.getMachineFunction() .getDenormalMode(APFloat::IEEEsingle()) .Input != DenormalMode::PreserveSign; @@ -2508,7 +2516,7 @@ std::pair AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, SDValue Src, SDNodeFlags Flags) const { - if (!needsDenormHandlingF32(DAG, Src, Flags)) + if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags)) return {}; MVT VT = MVT::f32; @@ -2706,7 +2714,9 @@ DAG.getTargetConstant(0, SL, MVT::i32), Flags); } - if (!needsDenormHandlingF32(DAG, Src, Flags)) + assert(VT == MVT::f32); + + if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags)) return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags); // bool needs_scaling = x < -0x1.f80000p+6f; @@ -2715,9 +2725,10 @@ // -nextafter(128.0, -1) SDValue RangeCheckConst = DAG.getConstantFP(-0x1.f80000p+6f, SL, VT); - SDValue NeedsScaling = DAG.getSetCC( - SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src, - RangeCheckConst, ISD::SETOLT); + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + SDValue NeedsScaling = + DAG.getSetCC(SL, SetCCVT, Src, RangeCheckConst, ISD::SETOLT); SDValue SixtyFour = DAG.getConstantFP(0x1.0p+6f, SL, VT); SDValue Zero = DAG.getConstantFP(0.0, SL, VT); @@ -2736,15 +2747,143 @@ return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags); } -// exp2(M_LOG2E_F * f); +SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, + SelectionDAG &DAG, + SDNodeFlags Flags) const { + // exp2(M_LOG2E_F * f); + EVT VT = Op.getValueType(); + const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Op, K, Flags); + return DAG.getNode(VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT, Mul, + Flags); +} + SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc SL(Op); - SDValue Src = Op.getOperand(0); + SDValue X = Op.getOperand(0); + SDNodeFlags Flags = Op->getFlags(); + const bool IsExp10 = false; // TODO: For some reason exp10 is missing - const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags()); - return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags()); + if (VT.getScalarType() == MVT::f16) { + // v_exp_f16 (fmul x, log2e) + if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast? + return lowerFEXPUnsafe(X, SL, DAG, Flags); + + if (VT.isVector()) + return SDValue(); + + // exp(f16 x) -> + // fptrunc (v_exp_f32 (fmul (fpext x), log2e)) + + // Nothing in half is a denormal when promoted to f32. + SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags); + SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags); + return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered, + DAG.getTargetConstant(0, SL, MVT::i32), Flags); + } + + assert(VT == MVT::f32); + + // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying + // library behavior. Also, is known-not-daz source sufficient? + if (allowApproxFunc(DAG, Flags) && !needsDenormHandlingF32(DAG, X, Flags)) { + assert(!IsExp10 && "todo exp10 support"); + return lowerFEXPUnsafe(X, SL, DAG, Flags); + } + + // Algorithm: + // + // e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64) + // + // x*(64/ln(2)) = n + f, |f| <= 0.5, n is integer + // n = 64*m + j, 0 <= j < 64 + // + // e^x = 2^((64*m + j + f)/64) + // = (2^m) * (2^(j/64)) * 2^(f/64) + // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64)) + // + // f = x*(64/ln(2)) - n + // r = f*(ln(2)/64) = x - n*(ln(2)/64) + // + // e^x = (2^m) * (2^(j/64)) * e^r + // + // (2^(j/64)) is precomputed + // + // e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! + // e^r = 1 + q + // + // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! + // + // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) ) + + SDValue PH, PL; + if (Subtarget->hasFastFMAF32()) { + const float c_exp = numbers::log2ef; + const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits + const float c_exp10 = 0x1.a934f0p+1f; + const float cc_exp10 = 0x1.2f346ep-24f; + + SDValue C = DAG.getConstantFP(IsExp10 ? c_exp10 : c_exp, SL, VT); + SDValue CC = DAG.getConstantFP(IsExp10 ? cc_exp10 : cc_exp, SL, VT); + + PH = DAG.getNode(ISD::FMUL, SL, VT, X, C, Flags); + SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags); + SDValue FMA0 = DAG.getNode(ISD::FMA, SL, VT, X, C, NegPH, Flags); + PL = DAG.getNode(ISD::FMA, SL, VT, X, CC, FMA0, Flags); + } else { + const float ch_exp = 0x1.714000p+0f; + const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits + + const float ch_exp10 = 0x1.a92000p+1f; + const float cl_exp10 = 0x1.4f0978p-11f; + + SDValue CH = DAG.getConstantFP(IsExp10 ? ch_exp10 : ch_exp, SL, VT); + SDValue CL = DAG.getConstantFP(IsExp10 ? cl_exp10 : cl_exp, SL, VT); + + SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X); + SDValue MaskConst = DAG.getConstant(0xfffff000, SL, MVT::i32); + SDValue XHAsInt = DAG.getNode(ISD::AND, SL, MVT::i32, XAsInt, MaskConst); + SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt); + SDValue XL = DAG.getNode(ISD::FSUB, SL, VT, X, XH, Flags); + + PH = DAG.getNode(ISD::FMUL, SL, VT, XH, CH, Flags); + + SDValue XLCL = DAG.getNode(ISD::FMUL, SL, VT, XL, CL, Flags); + SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags); + PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags); + } + + SDValue E = DAG.getNode(ISD::FRINT, SL, VT, PH, Flags); + SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, Flags); + SDValue A = DAG.getNode(ISD::FADD, SL, VT, PHSubE, PL, Flags); + SDValue IntE = DAG.getNode(ISD::FP_TO_SINT, SL, MVT::i32, E); + SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags); + + SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags); + + SDValue UnderflowCheckConst = + DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT); + + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Zero = DAG.getConstantFP(0.0, SL, VT); + SDValue Underflow = + DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT); + + R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R); + const auto &Options = getTargetMachine().Options; + + if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) { + SDValue OverflowCheckConst = + DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT); + SDValue Overflow = + DAG.getSetCC(SL, SetCCVT, X, OverflowCheckConst, ISD::SETOGT); + SDValue Inf = + DAG.getConstantFP(APFloat::getInf(APFloat::IEEEsingle()), SL, VT); + R = DAG.getNode(ISD::SELECT, SL, VT, Overflow, Inf, R); + } + + return R; } static bool isCtlzOpc(unsigned Opc) { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -87,6 +87,8 @@ bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src, double Log2BaseInverted, unsigned Flags) const; bool legalizeFExp2(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, Register Src, + unsigned Flags) const; bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1110,7 +1110,7 @@ .scalarize(0); // FIXME: fpow has a selection pattern that should move to custom lowering. - auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FPOW}); + auto &ExpOps = getActionDefinitionsBuilder(G_FPOW); if (ST.has16BitInsts()) ExpOps.customFor({{S32}, {S16}}); else @@ -1131,7 +1131,7 @@ Log2Ops.scalarize(0) .lower(); - auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10}); + auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP}); LogOps.customFor({S32, S16}); LogOps.clampScalar(0, MinScalarFPTy, S32) .scalarize(0); @@ -2996,12 +2996,16 @@ return false; } +static bool allowApproxFunc(const MachineFunction &MF, unsigned Flags) { + if (Flags & MachineInstr::FmAfn) + return true; + const auto &Options = MF.getTarget().Options; + return Options.UnsafeFPMath || Options.ApproxFuncFPMath; +} + static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src, unsigned Flags) { - return (Flags & MachineInstr::FmAfn) == 0 && - !MF.getTarget().Options.UnsafeFPMath && - !MF.getTarget().Options.ApproxFuncFPMath && - !valueIsKnownNeverF32Denorm(MF.getRegInfo(), Src) && + return !valueIsKnownNeverF32Denorm(MF.getRegInfo(), Src) && MF.getDenormalMode(APFloat::IEEEsingle()).Input != DenormalMode::PreserveSign; } @@ -3009,7 +3013,8 @@ std::pair AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src, unsigned Flags) const { - if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) + if (allowApproxFunc(B.getMF(), Flags) || + !needsDenormHandlingF32(B.getMF(), Src, Flags)) return {}; const LLT F32 = LLT::scalar(32); @@ -3235,7 +3240,8 @@ assert(Ty == F32); - if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) { + if (allowApproxFunc(B.getMF(), Flags) || + !needsDenormHandlingF32(B.getMF(), Src, Flags)) { B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef{Dst}, false) .addUse(Src) .setMIFlags(Flags); @@ -3268,16 +3274,160 @@ return true; } +bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, + Register Src, + unsigned Flags) const { + LLT Ty = B.getMRI()->getType(Dst); + auto K = B.buildFConstant(Ty, numbers::log2e); + auto Mul = B.buildFMul(Ty, Src, K, Flags); + + if (Ty == LLT::scalar(32)) { + B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef{Dst}, false) + .addUse(Mul.getReg(0)) + .setMIFlags(Flags); + } else { + B.buildFExp2(Dst, Mul.getReg(0), Flags); + } + + return true; +} + bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const { Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + Register X = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); - LLT Ty = B.getMRI()->getType(Dst); + MachineFunction &MF = B.getMF(); + MachineRegisterInfo &MRI = *B.getMRI(); + LLT Ty = MRI.getType(Dst); + const LLT F16 = LLT::scalar(16); + const LLT F32 = LLT::scalar(32); + const bool IsExp10 = false; // TODO: For some reason exp10 is missing - auto K = B.buildFConstant(Ty, numbers::log2e); - auto Mul = B.buildFMul(Ty, Src, K, Flags); - B.buildFExp2(Dst, Mul, Flags); + if (Ty == F16) { + // v_exp_f16 (fmul x, log2e) + if (allowApproxFunc(MF, Flags)) { + // TODO: Does this really require fast? + legalizeFExpUnsafe(B, Dst, X, Flags); + MI.eraseFromParent(); + return true; + } + + // exp(f16 x) -> + // fptrunc (v_exp_f32 (fmul (fpext x), log2e)) + + // Nothing in half is a denormal when promoted to f32. + auto Ext = B.buildFPExt(F32, X, Flags); + Register Lowered = MRI.createGenericVirtualRegister(F32); + legalizeFExpUnsafe(B, Lowered, Ext.getReg(0), Flags); + B.buildFPTrunc(Dst, Lowered, Flags); + MI.eraseFromParent(); + return true; + } + + assert(Ty == F32); + + // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying + // library behavior. Also, is known-not-daz source sufficient? + if (allowApproxFunc(MF, Flags) && !needsDenormHandlingF32(MF, X, Flags)) { + legalizeFExpUnsafe(B, Dst, X, Flags); + MI.eraseFromParent(); + return true; + } + + // Algorithm: + // + // e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64) + // + // x*(64/ln(2)) = n + f, |f| <= 0.5, n is integer + // n = 64*m + j, 0 <= j < 64 + // + // e^x = 2^((64*m + j + f)/64) + // = (2^m) * (2^(j/64)) * 2^(f/64) + // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64)) + // + // f = x*(64/ln(2)) - n + // r = f*(ln(2)/64) = x - n*(ln(2)/64) + // + // e^x = (2^m) * (2^(j/64)) * e^r + // + // (2^(j/64)) is precomputed + // + // e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! + // e^r = 1 + q + // + // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! + // + // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) ) + + Register PH, PL; + + if (ST.hasFastFMAF32()) { + const float c_exp = numbers::log2ef; + const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits + const float c_exp10 = 0x1.a934f0p+1f; + const float cc_exp10 = 0x1.2f346ep-24f; + + auto C = B.buildFConstant(Ty, IsExp10 ? c_exp10 : c_exp); + PH = B.buildFMul(Ty, X, C, Flags).getReg(0); + auto NegPH = B.buildFNeg(Ty, PH, Flags); + auto FMA0 = B.buildFMA(Ty, X, C, NegPH, Flags); + + auto CC = B.buildFConstant(Ty, IsExp10 ? cc_exp10 : cc_exp); + PL = B.buildFMA(Ty, X, CC, FMA0, Flags).getReg(0); + } else { + const float ch_exp = 0x1.714000p+0f; + const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits + + const float ch_exp10 = 0x1.a92000p+1f; + const float cl_exp10 = 0x1.4f0978p-11f; + + auto MaskConst = B.buildConstant(Ty, 0xfffff000); + auto XH = B.buildAnd(Ty, X, MaskConst); + auto XL = B.buildFSub(Ty, X, XH, Flags); + + auto CH = B.buildFConstant(Ty, IsExp10 ? ch_exp10 : ch_exp); + PH = B.buildFMul(Ty, XH, CH, Flags).getReg(0); + + auto CL = B.buildFConstant(Ty, IsExp10 ? cl_exp10 : cl_exp); + auto XLCL = B.buildFMul(Ty, XL, CL, Flags); + + Register Mad0 = + getMad(B, Ty, XL.getReg(0), CH.getReg(0), XLCL.getReg(0), Flags); + PL = getMad(B, Ty, XH.getReg(0), CL.getReg(0), Mad0, Flags); + } + + auto E = B.buildFRint(Ty, PH, Flags); + auto PHSubE = B.buildFSub(Ty, PH, E, Flags); + auto A = B.buildFAdd(Ty, PHSubE, PL, Flags); + auto IntE = B.buildFPTOSI(LLT::scalar(32), E); + + auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false) + .addUse(A.getReg(0)) + .setMIFlags(Flags); + auto R = B.buildFLdexp(Ty, Exp2, IntE, Flags); + + auto UnderflowCheckConst = + B.buildFConstant(Ty, IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f); + auto Zero = B.buildFConstant(Ty, 0.0); + auto Underflow = + B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, UnderflowCheckConst); + + R = B.buildSelect(Ty, Underflow, Zero, R); + + const auto &Options = MF.getTarget().Options; + + if (!(Flags & MachineInstr::FmNoInfs) && !Options.NoInfsFPMath) { + auto OverflowCheckConst = + B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f); + + auto Overflow = + B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), X, OverflowCheckConst); + auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle())); + R = B.buildSelect(Ty, Overflow, Inf, R, Flags); + } + + B.buildCopy(Dst, R); MI.eraseFromParent(); return true; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir @@ -17,54 +17,83 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; GFX8-LABEL: name: test_fexp_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX8-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[AND]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_fexp_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FEXP %0 $vgpr0 = COPY %1 @@ -82,54 +111,83 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; GFX8-LABEL: name: test_fexp_s32_nnan ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[SELECT]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[SELECT1]] - ; GFX8-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[COPY]], [[AND]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[AND]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = nnan G_FMUL [[AND]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_fexp_s32_nnan ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[SELECT]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[SELECT1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan G_FEXP %0 $vgpr0 = COPY %1 @@ -148,52 +206,93 @@ ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX6-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32) + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_fexp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[AND]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[AND1]] + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C1]] + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C2]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]] + ; GFX8-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL4]] + ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[FRINT1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]] + ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32) + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[FLDEXP1]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[SELECT2]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fexp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 @@ -202,25 +301,41 @@ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]] + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX9-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32) + ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FEXP %0 @@ -240,66 +355,127 @@ ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX6-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL4]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[SELECT4]] + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]] + ; GFX6-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]] + ; GFX6-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]] + ; GFX6-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL2]] + ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[FRINT2]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]] + ; GFX6-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32) ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32) + ; GFX6-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) + ; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[FLDEXP2]] + ; GFX6-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV2]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C5]], [[SELECT4]] + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX8-LABEL: name: test_fexp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL4]](s32), [[C1]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[SELECT4]] - ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[C5]] - ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[AND]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[AND1]] + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C1]] + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C2]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]] + ; GFX8-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL4]] + ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[FRINT1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]] + ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32) + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[FLDEXP1]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[SELECT2]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX8-NEXT: [[FSUB4:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[AND2]] + ; GFX8-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[AND2]], [[C1]] + ; GFX8-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FSUB4]], [[C2]] + ; GFX8-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FSUB4]], [[C1]] + ; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL10]], [[FMUL9]] + ; GFX8-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[AND2]], [[C2]] + ; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FADD6]] + ; GFX8-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL8]] + ; GFX8-NEXT: [[FSUB5:%[0-9]+]]:_(s32) = G_FSUB [[FMUL8]], [[FRINT2]] + ; GFX8-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FSUB5]], [[FADD7]] + ; GFX8-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32) + ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD8]](s32) + ; GFX8-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) + ; GFX8-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C4]], [[FLDEXP2]] + ; GFX8-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV2]](s32), [[C5]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C6]], [[SELECT4]] + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fexp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 @@ -308,32 +484,56 @@ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]] + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX9-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL4]](s32), [[C1]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[SELECT4]] + ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]] + ; GFX9-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL2]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[FRINT2]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]] + ; GFX9-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32) ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[C5]] - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32) + ; GFX9-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) + ; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[FLDEXP2]] + ; GFX9-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV2]](s32), [[C4]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C5]], [[SELECT4]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FEXP %0 @@ -354,18 +554,8 @@ ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_fexp_s16 @@ -373,20 +563,24 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX8-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fexp_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -408,31 +602,16 @@ ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](s32) + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) @@ -440,13 +619,17 @@ ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] - ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] - ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_1]](s16) + ; GFX8-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX8-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](s32) + ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] @@ -456,12 +639,16 @@ ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] - ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FEXP2_]](s16), [[FEXP2_1]](s16) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](s32) + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FEXP %1 Index: llvm/test/CodeGen/AMDGPU/llvm.exp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.exp.ll +++ llvm/test/CodeGen/AMDGPU/llvm.exp.ll @@ -13,19 +13,32 @@ ; VI-SDAG-LABEL: s_exp_f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; VI-SDAG-NEXT: s_mov_b32 s3, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s3, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: s_and_b32 s3, s2, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, s2, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s3, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x39a3b295 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, s3, v3 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 @@ -34,19 +47,32 @@ ; VI-GISEL-LABEL: s_exp_f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295 ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: s_and_b32 s3, s2, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s3 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s3, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 @@ -54,128 +80,254 @@ ; ; GFX900-SDAG-LABEL: s_exp_f32: ; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_exp_f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v1, s3, -v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v0, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; ; SI-SDAG-LABEL: s_exp_f32: ; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb -; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: s_load_dword s4, s[0:1], 0xb ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s4, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_exp_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb -; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: s_mov_b32 s3, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, s2 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v1, s3, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_fma_f32 v0, s2, v0, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_exp_f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 59, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: MUL_IEEE * T0.W, KC0[2].Z, literal.x, -; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; R600-NEXT: SETGT * T1.W, literal.x, PV.W, -; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00) -; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x, -; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00) -; R600-NEXT: ADD T0.W, T0.W, PV.W, -; R600-NEXT: CNDE * T1.W, T1.W, 1.0, literal.x, -; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; R600-NEXT: EXP_IEEE * T0.X, PV.W, -; R600-NEXT: MUL_IEEE T0.X, PS, T1.W, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, -; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; R600-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T1.W, KC0[2].Z, -PV.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T3.W, PS, +; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x, +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS, +; R600-NEXT: TRUNC * T4.W, PV.W, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T0.Z, PS, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T2.W, -T3.W, +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: ADD T1.Z, PS, PV.W, +; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x, +; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y, +; R600-NEXT: -330(nan), 381(5.338947e-43) +; R600-NEXT: ADD_INT T0.X, PS, literal.x, +; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y, +; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w, +; R600-NEXT: EXP_IEEE * T1.X, PV.Z, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x, +; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z, +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x, +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z, +; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X, +; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W, +; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.x, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T1.Z, T1.Y, T3.X, PS, +; R600-NEXT: CNDE_INT T0.W, T1.W, PV.W, T1.X, +; R600-NEXT: LSHL * T1.W, PV.Z, literal.x, +; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; R600-NEXT: ADD_INT T1.W, PS, literal.x, +; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.W, PV.Z, +; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T0.W, PS, PV.W, +; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z, +; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00) +; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[2].Z, literal.x, +; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 2(2.802597e-45) ; ; CM-LABEL: s_exp_f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 15, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 64, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, literal.x, -; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; CM-NEXT: SETGT * T1.W, literal.x, PV.W, -; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00) -; CM-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x, -; CM-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00) -; CM-NEXT: CNDE T0.Z, T1.W, 1.0, literal.x, -; CM-NEXT: ADD * T0.W, T0.W, PV.W, BS:VEC_120/SCL_212 -; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; CM-NEXT: EXP_IEEE T0.X, T0.W, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z, +; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W, +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: RNDNE * T2.W, PV.Z, +; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; CM-NEXT: TRUNC T2.Z, PV.W, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z, +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W, +; CM-NEXT: ADD T0.Z, T0.Z, -T2.W, +; CM-NEXT: FLT_TO_INT * T0.W, PV.Z, +; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x, +; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y, +; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z, +; CM-NEXT: 2130706432(1.701412e+38), -254(nan) +; CM-NEXT: -330(nan), 0(0.000000e+00) +; CM-NEXT: ADD_INT T1.X, T0.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w, +; CM-NEXT: -127(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y, +; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z, +; CM-NEXT: 254(3.559298e-43), -127(nan) +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W, +; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z, +; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y, +; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43) +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W, +; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.Y, T2.X, T0.Y, PV.W, +; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.X, +; CM-NEXT: LSHL * T1.W, PV.Y, literal.x, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x, +; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Z, PV.Y, +; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z, +; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, +; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00) +; CM-NEXT: CNDE T0.Z, PV.W, PV.Z, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[2].Z, literal.x, +; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) +; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call float @llvm.exp.f32(float %in) @@ -188,232 +340,524 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; VI-SDAG-LABEL: s_exp_v2f32: ; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; VI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] -; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: s_and_b32 s4, s3, 0xfffff000 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v4 +; VI-SDAG-NEXT: s_and_b32 s4, s2, 0xfffff000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_mov_b32_e32 v6, s4 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, s2, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: v_rndne_f32_e32 v5, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v5 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_exp_v2f32: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_mov_b32_e32 v2, s6 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s7 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3 -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1] -; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0 -; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v3, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: s_and_b32 s4, s2, 0xfffff000 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s4, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, s4, v1 +; VI-GISEL-NEXT: s_and_b32 s4, s3, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, s3, v5 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x39a3b295, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_rndne_f32_e32 v5, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v5 +; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_exp_v2f32: ; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v0 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 -; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3 -; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v8, v0 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3 -; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v7 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v6 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GFX900-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_exp_v2f32: ; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 +; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; GFX900-GISEL-NEXT: s_mov_b32 s5, 0x42b17218 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, s7 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1] -; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v3, v0 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v1, s4, -v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v1, s2, v0, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s3, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v3, s3, v3, -v5 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v0, v3 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v5 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v5, v3 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; ; SI-SDAG-LABEL: s_exp_v2f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5 +; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v5 +; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6 +; SI-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v7, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v5, v6 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v5 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4 ; SI-SDAG-NEXT: s_mov_b32 s4, s0 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s3, v0 -; SI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_exp_v2f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 +; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; SI-GISEL-NEXT: s_mov_b32 s5, 0x42b17218 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, s7 -; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3 -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1] -; SI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_exp_f32_e32 v3, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1 -; SI-GISEL-NEXT: s_mov_b32 s6, -1 -; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 -; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, s2 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v1, s4, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_fma_f32 v1, s2, v0, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v5, s3, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v3, s3, v3, -v5 +; SI-GISEL-NEXT: v_fma_f32 v0, s3, v0, v3 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v5 +; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v5, v3 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_exp_v2f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 21, @4, KC0[CB0:0-32], KC1[] -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: ALU 96, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 12, @101, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 ; R600-NEXT: CF_END -; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: MUL_IEEE * T0.W, KC0[3].X, literal.x, -; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; R600-NEXT: SETGT T1.W, literal.x, PV.W, -; R600-NEXT: MUL_IEEE * T2.W, KC0[2].W, literal.y, -; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00) -; R600-NEXT: SETGT T3.W, literal.x, PS, -; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y, -; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01) -; R600-NEXT: ADD T0.W, T0.W, PS, -; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.x, -; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00) -; R600-NEXT: ADD T0.Z, T2.W, PS, -; R600-NEXT: CNDE T1.W, T1.W, 1.0, literal.x, BS:VEC_120/SCL_212 -; R600-NEXT: EXP_IEEE * T0.X, PV.W, -; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W, -; R600-NEXT: CNDE T0.W, T3.W, 1.0, literal.x, +; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W, +; R600-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, +; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.y, +; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.z, +; R600-NEXT: -4096(nan), 967029397(3.122284e-04) +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T1.Z, PS, +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W, +; R600-NEXT: ADD * T2.W, KC0[2].W, -PV.Z, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x, +; R600-NEXT: MUL_IEEE T2.Z, T0.Z, literal.y, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T3.W, -PV.Z, +; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) +; R600-NEXT: ADD T3.Z, PS, PV.W, +; R600-NEXT: RNDNE T0.W, PV.Z, +; R600-NEXT: MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122 +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: TRUNC T0.Y, T1.Z, +; R600-NEXT: MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212 +; R600-NEXT: ADD T1.W, T2.Z, -PV.W, BS:VEC_201 ; R600-NEXT: EXP_IEEE * T0.X, PV.Z, -; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.X, PS, PV.W, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, -; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: ADD T0.Z, PV.W, PV.Z, +; R600-NEXT: FLT_TO_INT T1.W, PV.Y, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.x, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T1.Z, PS, literal.x, +; R600-NEXT: SETGT_UINT T3.W, PV.W, literal.y, +; R600-NEXT: EXP_IEEE * T0.Y, PV.Z, +; R600-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; R600-NEXT: CNDE_INT T1.X, PV.W, T2.W, PV.Z, +; R600-NEXT: MUL_IEEE T1.Y, PS, literal.x, +; R600-NEXT: MAX_INT T0.Z, T1.W, literal.y, +; R600-NEXT: MIN_INT T2.W, T1.W, literal.z, +; R600-NEXT: TRUNC * T0.W, T0.W, +; R600-NEXT: 2130706432(1.701412e+38), -330(nan) +; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T2.X, PS, +; R600-NEXT: ADD_INT T2.Y, PV.W, literal.x, +; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T0.W, T1.W, literal.z, +; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.w, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T3.X, T1.W, literal.x, +; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W, +; R600-NEXT: SETGT_INT T0.Z, T1.W, literal.x, +; R600-NEXT: MUL_IEEE T0.W, T0.X, literal.y, +; R600-NEXT: MUL_IEEE * T4.W, T0.Y, literal.y, +; R600-NEXT: -127(nan), 209715200(1.972152e-31) +; R600-NEXT: MUL_IEEE T4.X, PS, literal.x, +; R600-NEXT: MUL_IEEE T4.Y, PV.W, literal.x, +; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, T1.W, +; R600-NEXT: CNDE_INT T3.W, T3.W, PV.X, T2.Y, +; R600-NEXT: MAX_INT * T5.W, T2.X, literal.y, +; R600-NEXT: 209715200(1.972152e-31), -330(nan) +; R600-NEXT: SETGT_INT T3.X, T1.W, literal.x, +; R600-NEXT: ADD_INT T2.Y, PS, literal.y, +; R600-NEXT: ADD_INT T2.Z, T2.X, literal.z, +; R600-NEXT: SETGT_UINT * T1.W, T2.X, literal.w, +; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: MIN_INT * T5.W, T2.X, literal.x, +; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; R600-NEXT: ADD_INT T5.X, PV.W, literal.x, +; R600-NEXT: ADD_INT T3.Y, T2.X, literal.y, +; R600-NEXT: SETGT_UINT T3.Z, T2.X, literal.z, +; R600-NEXT: CNDE_INT T5.W, T1.W, T2.Y, T2.Z, +; R600-NEXT: SETGT_INT * T6.W, T2.X, literal.y, +; R600-NEXT: -254(nan), -127(nan) +; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T2.X, +; R600-NEXT: CNDE_INT T2.Y, PV.Z, PV.Y, PV.X, +; R600-NEXT: SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122 +; R600-NEXT: CNDE_INT * T0.W, T2.W, T4.Y, T0.W, +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T0.X, T0.Z, PS, T0.X, +; R600-NEXT: LSHL T3.Y, PV.W, literal.x, +; R600-NEXT: CNDE_INT T0.Z, PV.Z, PV.X, PV.Y, +; R600-NEXT: CNDE_INT T0.W, T1.W, T4.X, T4.W, +; R600-NEXT: MUL_IEEE * T1.W, T1.Y, literal.y, +; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T2.X, T3.Z, T1.Y, PS, +; R600-NEXT: CNDE_INT T0.Y, T6.W, PV.W, T0.Y, +; R600-NEXT: LSHL T0.Z, PV.Z, literal.x, +; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y, +; R600-NEXT: CNDE_INT * T1.W, T3.X, PV.X, T1.X, +; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) +; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W, +; R600-NEXT: SETGT T1.Z, literal.x, KC0[3].X, +; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y, +; R600-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00) +; R600-NEXT: ALU clause starting at 101: +; R600-NEXT: CNDE_INT * T1.W, T2.Z, T0.Y, T2.X, +; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W, +; R600-NEXT: SETGT T0.Z, literal.x, KC0[2].W, +; R600-NEXT: CNDE T0.W, T1.Z, T1.Y, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y, +; R600-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01) +; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x, +; R600-NEXT: CNDE T0.W, PV.Z, PV.Y, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[2].W, literal.y, +; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01) +; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 2(2.802597e-45) ; ; CM-LABEL: s_exp_v2f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X +; CM-NEXT: ALU 100, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 18, @105, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X ; CM-NEXT: CF_END -; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, literal.x, -; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; CM-NEXT: MUL_IEEE T0.Z, KC0[2].W, literal.x, -; CM-NEXT: SETGT * T1.W, literal.y, PV.W, -; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02) -; CM-NEXT: CNDE T1.Z, PV.W, 0.0, literal.x, -; CM-NEXT: SETGT * T2.W, literal.y, PV.Z, -; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02) -; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x, -; CM-NEXT: CNDE T2.Z, T1.W, 1.0, literal.y, -; CM-NEXT: ADD * T0.W, T0.W, PV.Z, BS:VEC_120/SCL_212 -; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20) -; CM-NEXT: EXP_IEEE T0.X, T0.W, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T1.Y, PV.X, T2.Z, -; CM-NEXT: CNDE T1.Z, T2.W, 1.0, literal.x, -; CM-NEXT: ADD * T0.W, T0.Z, T0.Y, -; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) +; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W, +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: RNDNE * T2.W, PV.Z, +; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; CM-NEXT: TRUNC T0.Y, PV.W, +; CM-NEXT: AND_INT T2.Z, KC0[3].X, literal.x, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z, +; CM-NEXT: -4096(nan), 1069064192(1.442383e+00) +; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W, +; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y, +; CM-NEXT: FLT_TO_INT T1.Z, PV.Y, +; CM-NEXT: ADD * T0.W, KC0[3].X, -PV.Z, +; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) +; CM-NEXT: ADD T1.X, T0.Z, -T2.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, +; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y, +; CM-NEXT: RNDNE * T1.W, PV.Y, +; CM-NEXT: 967029397(3.122284e-04), -330(nan) +; CM-NEXT: TRUNC T2.X, PV.W, +; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x, +; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y, +; CM-NEXT: ADD * T0.W, PV.X, T0.X, +; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00) ; CM-NEXT: EXP_IEEE T0.X, T0.W, ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE * T1.X, PV.X, T1.Z, -; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; CM-NEXT: ADD_INT T1.X, T1.Z, literal.x, +; CM-NEXT: MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221 +; CM-NEXT: ADD T0.Z, T1.Y, -T1.W, +; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z, +; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04) +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_UINT T3.X, T1.Z, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y, +; CM-NEXT: SETGT_UINT T2.Z, T1.Z, literal.z, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: -229(nan), 2130706432(1.701412e+38) +; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Y, T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: CNDE_INT T4.X, T2.Z, T0.W, T1.Y, +; CM-NEXT: CNDE_INT T1.Y, T3.X, T2.Y, T1.X, +; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212 +; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_INT T1.X, T1.Z, literal.x, +; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y, +; CM-NEXT: MUL_IEEE T3.Z, PV.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w, +; CM-NEXT: -127(nan), 209715200(1.972152e-31) +; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T2.X, PV.W, T0.W, PV.Z, +; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x, +; CM-NEXT: CNDE_INT T3.Z, PV.X, T1.Y, T1.Z, +; CM-NEXT: MAX_INT * T0.W, T0.Z, literal.y, +; CM-NEXT: 209715200(1.972152e-31), -330(nan) +; CM-NEXT: ADD_INT T5.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, T0.Z, literal.y, +; CM-NEXT: SETGT_UINT T4.Z, T0.Z, literal.z, +; CM-NEXT: MUL_IEEE * T0.W, T0.Y, literal.w, +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: -229(nan), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T6.X, PV.W, literal.x, +; CM-NEXT: MIN_INT T4.Y, T0.Z, literal.y, +; CM-NEXT: CNDE_INT T5.Z, PV.Z, PV.X, PV.Y, +; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.z, +; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z, +; CM-NEXT: MIN_INT T1.Y, T1.Z, literal.x, +; CM-NEXT: ADD_INT T5.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 381(5.338947e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T7.X, T1.W, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T4.Y, T0.Z, literal.x, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 127(1.779649e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T8.X, T2.Z, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T5.X, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201 +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T5.X, T2.W, PV.W, T0.Y, +; CM-NEXT: LSHL T0.Y, PV.Z, literal.x, +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T3.Z, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201 +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.X, T1.X, PV.W, T0.X, +; CM-NEXT: LSHL T2.Y, PV.Z, literal.x, +; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y, +; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) +; CM-NEXT: ALU clause starting at 105: +; CM-NEXT: CNDE_INT * T0.W, T4.Y, T5.X, T2.X, +; CM-NEXT: MUL_IEEE T1.X, PV.W, T0.Z, +; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X, +; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212 +; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, +; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W, +; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[3].X, literal.y, +; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01) +; CM-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.x, +; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[2].W, literal.y, +; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01) +; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %in) store <2 x float> %result, ptr addrspace(1) %out @@ -424,310 +868,749 @@ ; VI-SDAG-LABEL: s_exp_v3f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; VI-SDAG-NEXT: s_mov_b32 s7, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s7, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v0 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s7, v4 -; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s7, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4 +; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, s6, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, s5, v7 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7 +; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 +; VI-SDAG-NEXT: v_exp_f32_e32 v7, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5 +; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v1, v7, v6 +; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, s4, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7 +; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, s3 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, s2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v6 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_exp_v3f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 +; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v2 +; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, s5, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v7, v5 +; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 +; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0 +; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 +; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 -; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v6 +; VI-GISEL-NEXT: v_rndne_f32_e32 v6, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 -; VI-GISEL-NEXT: v_exp_f32_e32 v2, v1 -; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1] -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6 +; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_exp_v3f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6 +; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v0, -v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v1, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v2, v5 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, s5, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 -; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v5 +; GFX900-SDAG-NEXT: v_ldexp_f32 v6, v6, v7 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v9, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v10, v0 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v5, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4 -; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v7 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GFX900-SDAG-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_exp_v3f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v1, v0 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s5, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v6 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v8, v6 +; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v1, v7 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v8 +; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] -; GFX900-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v8, s5 +; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v7 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v3, s6, v3, -v7 +; GFX900-GISEL-NEXT: v_fma_f32 v1, s6, v1, v3 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v7 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v7, v7, v3 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v7, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v7, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v7, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v3, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; ; SI-SDAG-LABEL: s_exp_v3f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; SI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x32a5705f +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5 +; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6 +; SI-SDAG-NEXT: v_fma_f32 v5, s4, v0, -v5 +; SI-SDAG-NEXT: v_fma_f32 v5, s4, v2, v5 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; SI-SDAG-NEXT: v_fma_f32 v4, s5, v0, -v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v5, v5 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_fma_f32 v4, s5, v2, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v2, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, v5, v6 +; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v8, v6 +; SI-SDAG-NEXT: v_fma_f32 v0, s6, v0, -v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v8 +; SI-SDAG-NEXT: v_fma_f32 v0, s6, v2, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v9, v0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1] -; SI-SDAG-NEXT: s_mov_b32 s10, -1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v4, v0 -; SI-SDAG-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8 -; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v8 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v4 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 +; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_exp_v3f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_fma_f32 v0, s4, v1, v0 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v6, s5, v3 +; SI-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v6 +; SI-GISEL-NEXT: v_rndne_f32_e32 v8, v6 +; SI-GISEL-NEXT: v_fma_f32 v7, s5, v1, v7 +; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v8 +; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8 +; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, s5 -; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] -; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1] -; SI-GISEL-NEXT: s_mov_b32 s10, -1 -; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 -; SI-GISEL-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8 +; SI-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3 +; SI-GISEL-NEXT: v_fma_f32 v3, s6, v3, -v7 +; SI-GISEL-NEXT: v_fma_f32 v1, s6, v1, v3 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v7 +; SI-GISEL-NEXT: v_sub_f32_e32 v7, v7, v3 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v7, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 +; SI-GISEL-NEXT: v_mov_b32_e32 v8, s5 +; SI-GISEL-NEXT: s_mov_b32 s3, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v7, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v3, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_exp_v3f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 100, @6, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 69, @107, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; R600-NEXT: CF_END -; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: MUL_IEEE * T0.W, KC0[3].Z, literal.x, -; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; R600-NEXT: SETGT T1.W, literal.x, PV.W, -; R600-NEXT: MUL_IEEE * T2.W, KC0[3].Y, literal.y, -; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00) -; R600-NEXT: SETGT T3.W, literal.x, PS, -; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y, -; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01) -; R600-NEXT: ADD T0.Z, T0.W, PS, -; R600-NEXT: MUL_IEEE T0.W, KC0[3].W, literal.x, -; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y, -; R600-NEXT: 1069066811(1.442695e+00), 1115684864(6.400000e+01) -; R600-NEXT: ADD T0.Y, T2.W, PS, -; R600-NEXT: SETGT T1.Z, literal.x, PV.W, -; R600-NEXT: CNDE T1.W, T1.W, 1.0, literal.y, BS:VEC_120/SCL_212 -; R600-NEXT: EXP_IEEE * T0.X, PV.Z, -; R600-NEXT: -1023672320(-1.260000e+02), 528482304(5.421011e-20) -; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W, -; R600-NEXT: CNDE T0.Z, PV.Z, 0.0, literal.x, -; R600-NEXT: CNDE T1.W, T3.W, 1.0, literal.y, -; R600-NEXT: EXP_IEEE * T0.X, PV.Y, -; R600-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20) -; R600-NEXT: MUL_IEEE T1.X, PS, PV.W, -; R600-NEXT: ADD T0.W, T0.W, PV.Z, -; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, -; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; R600-NEXT: CNDE T1.W, T1.Z, 1.0, literal.x, -; R600-NEXT: EXP_IEEE * T0.Y, PV.W, -; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T2.X, PS, PV.W, -; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, -; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 6: +; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T1.W, KC0[3].Y, -PV.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T3.W, PS, +; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x, +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS, +; R600-NEXT: TRUNC * T4.W, PV.W, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T0.Z, PS, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T2.W, -T3.W, +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: ADD T0.W, PS, PV.W, +; R600-NEXT: MAX_INT * T1.W, PV.Z, literal.x, +; R600-NEXT: -330(nan), 0(0.000000e+00) +; R600-NEXT: ADD_INT T0.Y, PS, literal.x, +; R600-NEXT: ADD_INT T1.Z, T0.Z, literal.y, +; R600-NEXT: SETGT_UINT T1.W, T0.Z, literal.z, +; R600-NEXT: EXP_IEEE * T0.X, PV.W, +; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; R600-NEXT: -229(nan), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T0.W, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.y, +; R600-NEXT: -127(nan), 209715200(1.972152e-31) +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z, +; R600-NEXT: MIN_INT T3.W, T0.Z, literal.y, +; R600-NEXT: AND_INT * T4.W, KC0[3].W, literal.z, +; R600-NEXT: 209715200(1.972152e-31), 381(5.338947e-43) +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T1.X, T0.X, literal.x, +; R600-NEXT: ADD T1.Y, KC0[3].W, -PS, +; R600-NEXT: ADD_INT T2.Z, PV.W, literal.y, +; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT * T5.W, T0.Z, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), -254(nan) +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Z, +; R600-NEXT: SETGT_INT T2.Y, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y, +; R600-NEXT: MUL_IEEE T3.W, T4.W, literal.z, +; R600-NEXT: MUL_IEEE * T6.W, PV.X, literal.w, +; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04) +; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212 +; R600-NEXT: RNDNE T3.Y, PV.W, +; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z, +; R600-NEXT: CNDE_INT T5.W, PV.Y, T1.Z, PV.X, +; R600-NEXT: CNDE_INT * T1.W, T1.W, T0.Y, T2.W, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T0.X, T0.W, PS, T0.X, +; R600-NEXT: LSHL T0.Y, PV.W, literal.x, +; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y, +; R600-NEXT: MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: ADD * T1.W, T3.W, -PV.Y, +; R600-NEXT: 23(3.222986e-44), -4096(nan) +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: ADD T1.Y, PS, PV.W, +; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x, +; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y, +; R600-NEXT: CNDE_INT * T1.W, T2.Y, PV.X, T1.X, +; R600-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00) +; R600-NEXT: MUL_IEEE T0.X, PS, PV.W, +; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z, +; R600-NEXT: RNDNE T2.Z, PV.Z, +; R600-NEXT: TRUNC T0.W, T3.Y, +; R600-NEXT: EXP_IEEE * T1.X, PV.Y, +; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Y, +; R600-NEXT: FLT_TO_INT T1.Y, PV.W, +; R600-NEXT: TRUNC T3.Z, PV.Z, +; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y, +; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z, +; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y, +; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W, +; R600-NEXT: FLT_TO_INT T0.W, PV.Z, +; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 1069064192(1.442383e+00), 381(5.338947e-43) +; R600-NEXT: ADD_INT T4.X, PS, literal.x, +; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y, +; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.z, PV.Z, +; R600-NEXT: ADD T2.W, T0.Z, -T2.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MIN_INT * T3.W, PV.W, literal.w, +; R600-NEXT: -254(nan), -330(nan) +; R600-NEXT: 967029397(3.122284e-04), 381(5.338947e-43) +; R600-NEXT: ADD_INT T5.X, PS, literal.x, +; R600-NEXT: ADD T3.Y, PV.W, PV.Z, +; R600-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; R600-NEXT: ADD_INT T2.W, T0.W, literal.z, +; R600-NEXT: SETGT_UINT * T3.W, T0.W, literal.w, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x, +; R600-NEXT: -127(nan), 0(0.000000e+00) +; R600-NEXT: ALU clause starting at 107: +; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x, +; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221 +; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y, +; R600-NEXT: EXP_IEEE * T1.Z, T3.Y, +; R600-NEXT: 254(3.559298e-43), -127(nan) +; R600-NEXT: ADD_INT T7.X, T1.Y, literal.x, +; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.W, +; R600-NEXT: CNDE_INT T4.W, PV.Y, T6.X, T5.X, +; R600-NEXT: SETGT_INT * T0.W, T0.W, literal.z, +; R600-NEXT: -127(nan), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x, +; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W, +; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y, +; R600-NEXT: MUL_IEEE T4.W, T1.Z, literal.z, +; R600-NEXT: MUL_IEEE * T5.W, PV.Y, literal.w, +; R600-NEXT: 254(3.559298e-43), -330(nan) +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122 +; R600-NEXT: MUL_IEEE T3.Y, PV.W, literal.x, +; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z, +; R600-NEXT: SETGT_UINT * T5.W, T1.Y, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W, +; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x, +; R600-NEXT: CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T2.W, T2.W, PV.X, T1.Z, +; R600-NEXT: LSHL * T3.W, T4.Y, literal.y, +; R600-NEXT: -127(nan), 23(3.222986e-44) +; R600-NEXT: ADD_INT T6.X, PS, literal.x, +; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.W, PV.Z, +; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y, +; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X, +; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y, +; R600-NEXT: 1065353216(1.000000e+00), 127(1.779649e-43) +; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W, +; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X, +; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z, +; R600-NEXT: CNDE_INT T0.W, T5.W, T2.Y, T1.W, +; R600-NEXT: MUL_IEEE * T1.W, T3.X, literal.y, +; R600-NEXT: -1026650416(-1.032789e+02), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T3.X, T5.X, T3.X, PS, +; R600-NEXT: CNDE_INT T1.Y, T5.Y, PV.W, T1.X, +; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0, +; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x, +; R600-NEXT: LSHL * T1.W, PV.X, literal.y, +; R600-NEXT: 1118925336(8.872284e+01), 23(3.222986e-44) +; R600-NEXT: ADD_INT T1.X, PS, literal.x, +; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y, +; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, PV.X, +; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z, +; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF) +; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x, +; R600-NEXT: MUL_IEEE T0.W, PV.Z, PV.X, +; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; R600-NEXT: 2139095040(INF), -1026650416(-1.032789e+02) +; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, +; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[3].W, literal.y, +; R600-NEXT: 2(2.802597e-45), 1118925336(8.872284e+01) +; R600-NEXT: CNDE T2.X, PS, PV.W, literal.x, +; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 8(1.121039e-44) ; R600-NEXT: LSHR * T3.X, PV.W, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_exp_v3f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 39, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T0.X -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X +; CM-NEXT: ALU 102, @6, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 80, @109, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T3.X +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X ; CM-NEXT: CF_END -; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, literal.x, -; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; CM-NEXT: MUL_IEEE T0.Y, KC0[3].Y, literal.x, -; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, literal.x, -; CM-NEXT: SETGT * T1.W, literal.y, PV.W, -; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02) -; CM-NEXT: CNDE T1.Y, PV.W, 0.0, literal.x, -; CM-NEXT: SETGT T1.Z, literal.y, PV.Z, -; CM-NEXT: SETGT * T2.W, literal.y, PV.Y, -; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02) -; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x, -; CM-NEXT: CNDE T2.Y, PV.Z, 0.0, literal.x, -; CM-NEXT: CNDE T2.Z, T1.W, 1.0, literal.y, -; CM-NEXT: ADD * T0.W, T0.W, PV.Y, BS:VEC_120/SCL_212 -; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20) -; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: EXP_IEEE * T0.W, T0.W, -; CM-NEXT: MUL_IEEE T1.X, PV.W, T2.Z, -; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x, -; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y, -; CM-NEXT: ADD * T0.W, T0.Z, T2.Y, BS:VEC_201 -; CM-NEXT: 528482304(5.421011e-20), 8(1.121039e-44) -; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Z, T0.W, -; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: LSHR T2.X, T1.Z, literal.x, -; CM-NEXT: MUL_IEEE T3.Y, PV.Z, T1.Y, -; CM-NEXT: CNDE T0.Z, T2.W, 1.0, literal.y, -; CM-NEXT: ADD * T0.W, T0.Y, T0.X, -; CM-NEXT: 2(2.802597e-45), 528482304(5.421011e-20) -; CM-NEXT: EXP_IEEE T0.X, T0.W, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE * T3.X, PV.X, T0.Z, -; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 6: +; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W, +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: RNDNE * T2.W, PV.Z, +; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; CM-NEXT: TRUNC T2.Z, PV.W, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z, +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W, +; CM-NEXT: ADD T0.Z, T0.Z, -T2.W, +; CM-NEXT: FLT_TO_INT * T0.W, PV.Z, +; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x, +; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y, +; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z, +; CM-NEXT: 2130706432(1.701412e+38), -254(nan) +; CM-NEXT: -330(nan), 0(0.000000e+00) +; CM-NEXT: ADD_INT T1.X, T0.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w, +; CM-NEXT: -127(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y, +; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z, +; CM-NEXT: 254(3.559298e-43), -127(nan) +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W, +; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z, +; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y, +; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43) +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W, +; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T1.X, T2.X, T0.Y, PV.W, +; CM-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, T0.X, +; CM-NEXT: LSHL T0.Z, PV.Y, literal.x, +; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.y, +; CM-NEXT: 23(3.222986e-44), -4096(nan) +; CM-NEXT: MUL_IEEE T0.X, PV.W, literal.x, +; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W, +; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Y, PV.X, +; CM-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.Y, PV.W, PV.Z, +; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x, +; CM-NEXT: RNDNE * T0.W, PV.X, +; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y, +; CM-NEXT: TRUNC T2.Y, PV.W, +; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.y, +; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z, +; CM-NEXT: -1026650416(-1.032789e+02), -4096(nan) +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, PV.W, +; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y, +; CM-NEXT: FLT_TO_INT T0.Z, PV.Y, +; CM-NEXT: ADD * T1.W, KC0[3].W, -PV.Z, +; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) +; CM-NEXT: ADD T0.X, T0.X, -T0.W, +; CM-NEXT: MUL_IEEE T2.Y, PV.W, literal.x, +; CM-NEXT: MAX_INT T2.Z, PV.Z, literal.y, +; CM-NEXT: RNDNE * T0.W, PV.Y, +; CM-NEXT: 967029397(3.122284e-04), -330(nan) +; CM-NEXT: TRUNC T3.X, PV.W, +; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.x, +; CM-NEXT: MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y, +; CM-NEXT: ADD * T1.W, PV.X, T2.X, +; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x, +; CM-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221 +; CM-NEXT: ADD T1.Z, T1.Y, -T0.W, +; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z, +; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04) +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_UINT T4.X, T0.Z, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y, +; CM-NEXT: SETGT_UINT T2.Z, T0.Z, literal.z, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: -229(nan), 2130706432(1.701412e+38) +; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T1.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T1.Z, T1.W, +; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W, +; CM-NEXT: ALU clause starting at 109: +; CM-NEXT: CNDE_INT T5.X, T2.Z, T0.W, T1.Y, +; CM-NEXT: CNDE_INT T1.Y, T4.X, T3.Y, T2.X, +; CM-NEXT: FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212 +; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_INT T2.X, T0.Z, literal.x, +; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y, +; CM-NEXT: MUL_IEEE T4.Z, PV.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w, +; CM-NEXT: -127(nan), 209715200(1.972152e-31) +; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T3.X, PV.W, T0.W, PV.Z, +; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x, +; CM-NEXT: CNDE_INT T4.Z, PV.X, T1.Y, T0.Z, +; CM-NEXT: MAX_INT * T0.W, T3.Z, literal.y, +; CM-NEXT: 209715200(1.972152e-31), -330(nan) +; CM-NEXT: ADD_INT T6.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, T3.Z, literal.y, +; CM-NEXT: SETGT_UINT T5.Z, T3.Z, literal.z, +; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212 +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: -229(nan), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T7.X, PV.W, literal.x, +; CM-NEXT: MIN_INT T4.Y, T3.Z, literal.y, +; CM-NEXT: CNDE_INT T6.Z, PV.Z, PV.X, PV.Y, +; CM-NEXT: SETGT_INT * T2.W, T3.Z, literal.z, +; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, T3.Z, +; CM-NEXT: MIN_INT T1.Y, T0.Z, literal.x, +; CM-NEXT: ADD_INT T6.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 381(5.338947e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T8.X, T1.W, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T4.Y, T3.Z, literal.x, +; CM-NEXT: ADD_INT T3.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 127(1.779649e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T9.X, T2.Z, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201 +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T6.X, T2.W, PV.W, T1.Z, +; CM-NEXT: LSHL T5.Y, PV.Z, literal.x, +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.Z, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T4.X, T3.Y, T2.Y, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.X, T2.X, PV.W, T0.X, +; CM-NEXT: LSHL T2.Y, PV.Z, literal.x, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122 +; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z, +; CM-NEXT: SETGT T3.Y, literal.x, KC0[3].W, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T1.Y, PV.X, T5.X, +; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, +; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].Z, +; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y, +; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01) +; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x, +; CM-NEXT: CNDE T1.Y, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y, +; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, +; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01) +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: LSHR T0.X, PV.W, literal.x, +; CM-NEXT: CNDE T1.Y, PV.Z, PV.Y, literal.y, +; CM-NEXT: CNDE T0.Z, T1.X, T0.Y, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z, +; CM-NEXT: 2(2.802597e-45), 2139095040(INF) +; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) +; CM-NEXT: CNDE * T1.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in) store <3 x float> %result, ptr addrspace(1) %out @@ -740,367 +1623,946 @@ ; VI-SDAG-LABEL: s_exp_v4f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; VI-SDAG-NEXT: s_mov_b32 s8, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 +; VI-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0 -; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v3 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v1, s[0:1] +; VI-SDAG-NEXT: s_and_b32 s2, s7, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, s7, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, s6, v7 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 ; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 -; VI-SDAG-NEXT: v_exp_f32_e32 v5, v3 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v3, v2, v3 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v2, v5, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, s5, v0 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v5 -; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] -; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 -; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v5, v5 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0 +; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5 +; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6 +; VI-SDAG-NEXT: v_sub_f32_e32 v9, s5, v9 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v1, v2, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9 +; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10 +; VI-SDAG-NEXT: v_mul_f32_e32 v10, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v9, v10, v9 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v9 +; VI-SDAG-NEXT: v_exp_f32_e32 v9, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6 +; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v1, v9, v7 +; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v9, s4, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9 +; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v9 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v1, v5, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4 -; VI-SDAG-NEXT: v_mov_b32_e32 v5, s3 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, s2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v7 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_exp_v4f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8a000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x39a3b295 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1] +; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0 +; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 +; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, s5, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1 +; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; VI-GISEL-NEXT: v_exp_f32_e32 v3, v2 +; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, s2, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 +; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v9 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, s2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v9, v6 +; VI-GISEL-NEXT: v_rndne_f32_e32 v9, v8 +; VI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v9 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v6 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1] -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2 +; VI-GISEL-NEXT: s_and_b32 s2, s7, 0xfffff000 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v3 +; VI-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 +; VI-GISEL-NEXT: v_mov_b32_e32 v8, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v8, s7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3fb8a000, v8 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v8 +; VI-GISEL-NEXT: v_rndne_f32_e32 v8, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8 +; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_exp_v4f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; GFX900-SDAG-NEXT: s_mov_b32 s8, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s7, v0 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v1 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1] -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x1f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v6, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, v1, v3 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v6, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v2, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v0 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v7 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[0:1] -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v5 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v9, 0x7f800000 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v3 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v3, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v8, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v8, v3 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v2, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v8, v7 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v8, v7 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v8 +; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v0, -v7 +; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v1, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v7, v10, v7 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v8 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6 +; GFX900-SDAG-NEXT: v_ldexp_f32 v7, v7, v8 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v8, s4, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v10, v8 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v8 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v11, v8, v10 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v11, v0 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v6, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v5 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, v6, s[0:1] +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v8 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5 -; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_exp_v4f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v8, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v2, v7 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1] -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] -; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v8, s5 +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v7 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8 +; GFX900-GISEL-NEXT: v_fma_f32 v9, s6, v3, -v7 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v10, v7 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s7, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v9, s6, v2, v9 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v7, v7, v10 +; GFX900-GISEL-NEXT: v_fma_f32 v3, s7, v3, -v8 +; GFX900-GISEL-NEXT: v_add_f32_e32 v7, v7, v9 +; GFX900-GISEL-NEXT: v_fma_f32 v2, s7, v2, v3 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v8 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v9, v10 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v7, v7 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v7, v7, v9 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; ; SI-SDAG-LABEL: s_exp_v4f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; SI-SDAG-NEXT: s_mov_b32 s2, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218 +; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s7, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s2, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v5, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v3, v1, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v5, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s2, v0 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v4 +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v3 +; SI-SDAG-NEXT: v_sub_f32_e32 v7, v3, v6 +; SI-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3 +; SI-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v7, v3 +; SI-SDAG-NEXT: v_exp_f32_e32 v7, v3 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v5 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v7, v6 +; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v7, v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v7 +; SI-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6 +; SI-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v6, v9, v6 +; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v6, v6, v7 +; SI-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v9, v7 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7 +; SI-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v10, v0 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v5, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v4 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1] -; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 -; SI-SDAG-NEXT: s_mov_b32 s10, -1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4 -; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v7 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_exp_v4f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; SI-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v3 +; SI-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v8, v1 +; SI-GISEL-NEXT: v_fma_f32 v7, s5, v2, v7 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6 -; SI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] -; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 -; SI-GISEL-NEXT: v_exp_f32_e32 v3, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4 -; SI-GISEL-NEXT: s_mov_b32 s10, -1 -; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 -; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5 +; SI-GISEL-NEXT: v_mov_b32_e32 v8, s5 +; SI-GISEL-NEXT: s_mov_b32 s3, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8 +; SI-GISEL-NEXT: v_fma_f32 v9, s6, v3, -v7 +; SI-GISEL-NEXT: v_rndne_f32_e32 v10, v7 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, s7, v3 +; SI-GISEL-NEXT: v_fma_f32 v9, s6, v2, v9 +; SI-GISEL-NEXT: v_sub_f32_e32 v7, v7, v10 +; SI-GISEL-NEXT: v_fma_f32 v3, s7, v3, -v8 +; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v9 +; SI-GISEL-NEXT: v_fma_f32 v2, s7, v2, v3 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v8 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v9, v10 +; SI-GISEL-NEXT: v_exp_f32_e32 v7, v7 +; SI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2 +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v7, v7, v9 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_exp_v4f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 +; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 98, @105, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 24, @204, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD -; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: MUL_IEEE * T0.W, KC0[4].X, literal.x, -; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; R600-NEXT: SETGT T1.W, literal.x, PV.W, -; R600-NEXT: MUL_IEEE * T2.W, KC0[3].W, literal.y, -; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00) -; R600-NEXT: SETGT T0.Z, literal.x, PS, -; R600-NEXT: MUL_IEEE T3.W, KC0[3].Z, literal.y, -; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.z, -; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00) -; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00) -; R600-NEXT: ADD T0.Y, T0.W, PS, -; R600-NEXT: SETGT T1.Z, literal.x, PV.W, -; R600-NEXT: MUL_IEEE T0.W, KC0[3].Y, literal.y, -; R600-NEXT: CNDE * T4.W, PV.Z, 0.0, literal.z, -; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00) -; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00) -; R600-NEXT: ADD T0.X, T2.W, PS, -; R600-NEXT: CNDE T1.Y, T1.W, 1.0, literal.x, BS:VEC_120/SCL_212 -; R600-NEXT: SETGT T2.Z, literal.y, PV.W, -; R600-NEXT: CNDE T1.W, PV.Z, 0.0, literal.z, -; R600-NEXT: EXP_IEEE * T0.Y, PV.Y, -; R600-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02) -; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00) -; R600-NEXT: ADD T1.X, T3.W, PV.W, -; R600-NEXT: CNDE T2.Y, T0.Z, 1.0, literal.x, -; R600-NEXT: CNDE T0.Z, PV.Z, 0.0, literal.y, -; R600-NEXT: MUL_IEEE T3.W, PS, PV.Y, -; R600-NEXT: EXP_IEEE * T0.X, PV.X, -; R600-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01) -; R600-NEXT: ADD T0.Y, T0.W, PV.Z, -; R600-NEXT: MUL_IEEE T3.Z, PS, PV.Y, -; R600-NEXT: CNDE T0.W, T1.Z, 1.0, literal.x, -; R600-NEXT: EXP_IEEE * T0.X, PV.X, -; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T3.Y, PS, PV.W, -; R600-NEXT: CNDE T0.W, T2.Z, 1.0, literal.x, -; R600-NEXT: EXP_IEEE * T0.X, PV.Y, -; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T3.X, PS, PV.W, -; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, -; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; R600-NEXT: ALU clause starting at 6: +; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T1.W, KC0[3].Z, -PV.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T3.W, PS, +; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x, +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS, +; R600-NEXT: TRUNC * T4.W, PV.W, +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T0.Z, PS, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T2.W, -T3.W, +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: ADD T1.Z, PS, PV.W, +; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x, +; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y, +; R600-NEXT: -330(nan), 381(5.338947e-43) +; R600-NEXT: ADD_INT T0.X, PS, literal.x, +; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y, +; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w, +; R600-NEXT: EXP_IEEE * T1.X, PV.Z, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x, +; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z, +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x, +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z, +; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X, +; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: AND_INT T2.Y, KC0[4].X, literal.x, +; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W, +; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.y, +; R600-NEXT: -4096(nan), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T0.X, T1.Y, T3.X, PS, +; R600-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.X, +; R600-NEXT: LSHL T0.Z, PV.Z, literal.x, +; R600-NEXT: ADD T0.W, KC0[4].X, -PV.Y, +; R600-NEXT: MUL_IEEE * T1.W, PV.Y, literal.y, +; R600-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00) +; R600-NEXT: RNDNE T1.Y, PS, +; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; R600-NEXT: ADD_INT T2.W, PV.Z, literal.y, +; R600-NEXT: CNDE_INT * T3.W, T4.W, PV.Y, PV.X, +; R600-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00) +; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W, +; R600-NEXT: AND_INT T0.Z, KC0[3].W, literal.x, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.Z, +; R600-NEXT: TRUNC * T2.W, PV.Y, +; R600-NEXT: -4096(nan), 1069064192(1.442383e+00) +; R600-NEXT: SETGT T0.X, literal.x, KC0[3].Z, +; R600-NEXT: FLT_TO_INT T3.Y, PS, +; R600-NEXT: MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W, +; R600-NEXT: ADD T0.W, T1.W, -T1.Y, +; R600-NEXT: MUL_IEEE * T1.W, PV.Z, literal.z, +; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04) +; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T1.X, PS, +; R600-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x, +; R600-NEXT: ADD T1.Z, PV.W, PV.Z, +; R600-NEXT: MAX_INT T0.W, PV.Y, literal.y, +; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.z, +; R600-NEXT: -4096(nan), -330(nan) +; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; R600-NEXT: ADD_INT T2.X, PS, literal.x, +; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y, +; R600-NEXT: ADD_INT T2.Z, T3.Y, literal.z, +; R600-NEXT: SETGT_UINT T0.W, T3.Y, literal.w, +; R600-NEXT: EXP_IEEE * T1.Z, PV.Z, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T3.X, T3.Y, literal.x, +; R600-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y, +; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T2.W, T3.Y, literal.x, +; R600-NEXT: MUL_IEEE * T3.W, PS, literal.z, +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T4.X, T1.Z, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Z, T3.Y, +; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T2.X, +; R600-NEXT: SETGT_INT * T5.W, T3.Y, literal.z, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: ADD T2.X, KC0[3].W, -T0.Z, +; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W, +; R600-NEXT: CNDE_INT * T2.Z, T0.W, PV.Y, T3.W, +; R600-NEXT: ALU clause starting at 105: +; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.x, +; R600-NEXT: ADD * T3.W, KC0[3].Y, -T1.Y, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, PS, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, T1.Y, literal.y, +; R600-NEXT: CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T0.W, T2.W, T2.Z, T1.Z, +; R600-NEXT: LSHL * T2.W, T3.Y, literal.z, +; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) +; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; R600-NEXT: ADD_INT T4.X, PS, literal.x, +; R600-NEXT: CNDE_INT T3.Y, T5.W, PV.W, PV.Z, +; R600-NEXT: RNDNE T1.Z, PV.Y, +; R600-NEXT: MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212 +; R600-NEXT: MUL_IEEE * T2.W, T2.X, literal.z, +; R600-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00) +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T2.X, T2.X, literal.x, PS, +; R600-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W, +; R600-NEXT: ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MUL_IEEE T0.W, PV.Y, PV.X, +; R600-NEXT: SETGT * T2.W, literal.z, KC0[4].X, +; R600-NEXT: 1069064192(1.442383e+00), 967029397(3.122284e-04) +; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00) +; R600-NEXT: CNDE T3.X, PS, PV.W, 0.0, +; R600-NEXT: ADD T1.Y, PV.Z, PV.Y, +; R600-NEXT: TRUNC T1.Z, T1.Z, +; R600-NEXT: MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212 +; R600-NEXT: ADD * T1.W, T1.W, -T1.X, +; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) +; R600-NEXT: SETGT T2.X, KC0[4].X, literal.x, +; R600-NEXT: ADD T2.Y, PS, PV.W, +; R600-NEXT: FLT_TO_INT T0.Z, PV.Z, +; R600-NEXT: TRUNC T0.W, T1.X, +; R600-NEXT: EXP_IEEE * T1.X, PV.Y, +; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T4.X, PS, literal.x, +; R600-NEXT: FLT_TO_INT T1.Y, PV.W, +; R600-NEXT: MAX_INT T1.Z, PV.Z, literal.y, +; R600-NEXT: MUL_IEEE T0.W, PS, literal.z, +; R600-NEXT: EXP_IEEE * T1.W, PV.Y, +; R600-NEXT: 2130706432(1.701412e+38), -330(nan) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T5.X, PV.W, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, PS, literal.x, +; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T2.W, T0.Z, literal.z, +; R600-NEXT: MAX_INT * T3.W, PV.Y, literal.w, +; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -330(nan) +; R600-NEXT: SETGT_UINT T6.X, T0.Z, literal.x, +; R600-NEXT: ADD_INT T3.Y, PS, literal.y, +; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.z, +; R600-NEXT: SETGT_UINT T3.W, T1.Y, literal.x, +; R600-NEXT: MIN_INT * T4.W, T1.Y, literal.w, +; R600-NEXT: -229(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43) +; R600-NEXT: ADD_INT T7.X, PS, literal.x, +; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y, +; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z, +; R600-NEXT: CNDE_INT T4.W, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT * T5.W, T1.Y, literal.y, +; R600-NEXT: -254(nan), -127(nan) +; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T8.X, PS, PV.W, T1.Y, +; R600-NEXT: CNDE_INT T3.Y, PV.Z, PV.Y, PV.X, +; R600-NEXT: SETGT_INT T2.Z, T1.Y, literal.x, +; R600-NEXT: CNDE_INT T2.W, T6.X, T1.Z, T2.W, +; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.y, +; R600-NEXT: 127(1.779649e-43), -127(nan) +; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T0.Z, +; R600-NEXT: CNDE_INT T1.Y, PV.Z, PV.X, PV.Y, +; R600-NEXT: MIN_INT T1.Z, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE T2.W, T1.W, literal.y, +; R600-NEXT: MUL_IEEE * T6.W, T2.Y, literal.z, +; R600-NEXT: 381(5.338947e-43), 2130706432(1.701412e+38) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T8.X, T3.W, PS, T2.Y, +; R600-NEXT: MUL_IEEE T2.Y, PV.W, literal.x, +; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT * T6.W, T0.Z, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), -254(nan) +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: CNDE_INT T9.X, PS, PV.W, PV.Z, +; R600-NEXT: SETGT_INT T3.Y, T0.Z, literal.x, +; R600-NEXT: CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122 +; R600-NEXT: LSHL * T2.W, T1.Y, literal.y, +; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44) +; R600-NEXT: ADD_INT T8.X, PS, literal.x, +; R600-NEXT: CNDE_INT T1.Y, T2.Z, PV.W, PV.Z, +; R600-NEXT: CNDE_INT T0.Z, PV.Y, T7.X, PV.X, +; R600-NEXT: CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122 +; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE * T1.W, T4.X, literal.x, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T4.X, T6.W, T4.X, PV.W, +; R600-NEXT: CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212 +; R600-NEXT: ALU clause starting at 204: +; R600-NEXT: LSHL T0.Z, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE T0.W, T1.Y, T8.X, +; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; R600-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02) +; R600-NEXT: CNDE T1.X, PS, PV.W, 0.0, +; R600-NEXT: SETGT T1.Y, KC0[3].W, literal.x, +; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; R600-NEXT: CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE * T1.W, T2.X, T3.X, literal.z, +; R600-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00) +; R600-NEXT: 2139095040(INF), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T2.X, PV.W, PV.Z, +; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y, +; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y, +; R600-NEXT: CNDE T0.W, T0.X, T0.Y, 0.0, +; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z, +; R600-NEXT: -1026650416(-1.032789e+02), 2139095040(INF) +; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x, +; R600-NEXT: CNDE T0.W, PV.Y, PV.X, 0.0, +; R600-NEXT: SETGT * T2.W, KC0[3].Y, literal.y, +; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01) +; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 2(2.802597e-45) ; ; CM-LABEL: s_exp_v4f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 49, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X +; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 100, @104, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 36, @205, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X ; CM-NEXT: CF_END ; CM-NEXT: PAD -; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, literal.x, -; CM-NEXT: MUL_IEEE * T0.W, KC0[4].X, literal.x, -; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) -; CM-NEXT: MUL_IEEE T0.Y, KC0[3].W, literal.x, -; CM-NEXT: SETGT T1.Z, literal.y, PV.W, -; CM-NEXT: SETGT * T1.W, literal.y, PV.Z, -; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02) -; CM-NEXT: CNDE T1.Y, PV.W, 0.0, literal.x, -; CM-NEXT: CNDE T2.Z, PV.Z, 0.0, literal.x, -; CM-NEXT: SETGT * T2.W, literal.y, PV.Y, -; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02) -; CM-NEXT: CNDE T0.X, T1.Z, 1.0, literal.x, -; CM-NEXT: CNDE T2.Y, PV.W, 0.0, literal.y, -; CM-NEXT: MUL_IEEE T1.Z, KC0[3].Y, literal.z, -; CM-NEXT: ADD * T0.W, T0.W, PV.Z, -; CM-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01) -; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00) +; CM-NEXT: ALU clause starting at 6: +; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, +; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y, +; CM-NEXT: AND_INT * T2.W, KC0[3].W, literal.z, +; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD T1.Y, KC0[3].W, -PV.W, +; CM-NEXT: RNDNE T1.Z, PV.Z, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y, +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W, +; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z, +; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x, +; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212 +; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) +; CM-NEXT: TRUNC T1.X, T1.Z, +; CM-NEXT: RNDNE T2.Y, PV.W, +; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z, +; CM-NEXT: ADD * T1.W, PV.Y, PV.X, +; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z, +; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212 +; CM-NEXT: FLT_TO_INT T0.Z, T1.X, +; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y, +; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y, +; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z, +; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.w, +; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38) +; CM-NEXT: -330(nan), 381(5.338947e-43) +; CM-NEXT: ADD_INT T3.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.y, +; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w, +; CM-NEXT: -254(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: ADD_INT T4.X, T0.Z, literal.x, +; CM-NEXT: SETGT_UINT T4.Y, T0.Z, literal.y, +; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x, +; CM-NEXT: -127(nan), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z, +; CM-NEXT: CNDE_INT T3.Y, PV.Y, PV.X, T3.X, +; CM-NEXT: SETGT_INT T0.Z, T0.Z, literal.x, +; CM-NEXT: MUL_IEEE * T3.W, T1.Y, literal.y, +; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38) +; CM-NEXT: CNDE_INT T3.X, T4.Y, T1.Y, PV.W, +; CM-NEXT: AND_INT T1.Y, KC0[3].Z, literal.x, +; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y, +; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.X, T0.W, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X, +; CM-NEXT: LSHL T3.Y, PV.Z, literal.x, +; CM-NEXT: TRUNC T1.Z, T2.Y, +; CM-NEXT: ADD * T0.W, KC0[3].Z, -PV.Y, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x, +; CM-NEXT: FLT_TO_INT T2.Y, PV.Z, +; CM-NEXT: ADD_INT T1.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T1.W, T0.Z, PV.X, T3.X, +; CM-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, +; CM-NEXT: MIN_INT T3.Y, PV.Y, literal.x, +; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.X, +; CM-NEXT: ADD * T0.W, T0.Y, T2.X, +; CM-NEXT: 381(5.338947e-43), 1069064192(1.442383e+00) ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: EXP_IEEE * T0.W, T0.W, -; CM-NEXT: CNDE T1.X, T2.W, 1.0, literal.x, -; CM-NEXT: SETGT T3.Y, literal.y, T1.Z, -; CM-NEXT: ADD T2.Z, T0.Y, T2.Y, -; CM-NEXT: MUL_IEEE * T2.W, PV.W, T0.X, -; CM-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02) -; CM-NEXT: EXP_IEEE T0.X, T2.Z, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T2.Z, -; CM-NEXT: EXP_IEEE T0.Z (MASKED), T2.Z, -; CM-NEXT: EXP_IEEE * T0.W (MASKED), T2.Z, -; CM-NEXT: CNDE T2.X, T3.Y, 0.0, literal.x, -; CM-NEXT: CNDE T0.Y, T1.W, 1.0, literal.y, -; CM-NEXT: MUL_IEEE T2.Z, PV.X, T1.X, -; CM-NEXT: ADD * T0.W, T0.Z, T1.Y, -; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20) -; CM-NEXT: EXP_IEEE T0.X, T0.W, -; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, +; CM-NEXT: EXP_IEEE T0.Y, T0.W, ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T2.Y, PV.X, T0.Y, -; CM-NEXT: CNDE T0.Z, T3.Y, 1.0, literal.x, -; CM-NEXT: ADD * T0.W, T1.Z, T2.X, -; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00) -; CM-NEXT: EXP_IEEE T0.X, T0.W, +; CM-NEXT: MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z, +; CM-NEXT: MUL_IEEE T4.Y, PV.Y, literal.y, +; CM-NEXT: ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201 +; CM-NEXT: 967029397(3.122284e-04), 2130706432(1.701412e+38) +; CM-NEXT: -254(nan), -330(nan) +; CM-NEXT: ADD_INT T2.X, T2.Y, literal.x, +; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T1.Z, T2.Y, literal.z, +; CM-NEXT: SETGT_UINT * T0.W, T2.Y, literal.w, +; CM-NEXT: -127(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: SETGT_UINT T3.X, T2.Y, literal.x, +; CM-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T1.Z, T2.Y, literal.y, +; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 254(3.559298e-43), -127(nan) +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T4.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y, +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: ALU clause starting at 104: +; CM-NEXT: CNDE_INT T0.Z, T3.X, T2.X, T0.Z, +; CM-NEXT: SETGT_INT * T2.W, T2.Y, literal.x, +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T2.X, T1.Y, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, T3.Y, PV.Z, +; CM-NEXT: CNDE_INT T0.Z, T0.W, T4.X, T1.W, +; CM-NEXT: MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201 +; CM-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38) +; CM-NEXT: AND_INT T4.X, KC0[4].X, literal.x, +; CM-NEXT: CNDE_INT T2.Y, T3.X, T4.Y, PV.W, +; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y, +; CM-NEXT: LSHL * T0.W, PV.Y, literal.y, +; CM-NEXT: -4096(nan), 23(3.222986e-44) +; CM-NEXT: ADD_INT T3.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.Z, PV.Y, +; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y, +; CM-NEXT: RNDNE * T0.W, T2.X, +; CM-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00) +; CM-NEXT: ADD T2.X, T2.X, -PV.W, +; CM-NEXT: RNDNE T1.Y, PV.Z, +; CM-NEXT: MUL_IEEE T1.Z, PV.Y, PV.X, +; CM-NEXT: SETGT * T1.W, literal.x, KC0[3].W, +; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00) +; CM-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0, +; CM-NEXT: TRUNC T0.Y, T0.W, +; CM-NEXT: TRUNC T1.Z, PV.Y, +; CM-NEXT: ADD * T0.W, PV.X, T1.X, +; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W, ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE * T2.X, PV.X, T0.Z, -; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; CM-NEXT: EXP_IEEE * T0.W, T0.W, +; CM-NEXT: FLT_TO_INT T1.X, T1.Z, +; CM-NEXT: FLT_TO_INT T0.Y, T0.Y, +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[4].X, -T4.X, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x, +; CM-NEXT: MUL_IEEE T2.Y, T0.W, literal.y, +; CM-NEXT: MUL_IEEE T2.Z, PV.Z, literal.z, +; CM-NEXT: SETGT_UINT * T2.W, PV.Y, literal.w, +; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31) +; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T5.X, PV.W, T1.Z, PV.Z, +; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x, +; CM-NEXT: MULADD_IEEE T1.Z, T1.W, literal.y, PV.X, +; CM-NEXT: MAX_INT * T1.W, T1.X, literal.z, +; CM-NEXT: 209715200(1.972152e-31), 1069064192(1.442383e+00) +; CM-NEXT: -330(nan), 0(0.000000e+00) +; CM-NEXT: ADD_INT T2.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T4.Y, T1.X, literal.y, +; CM-NEXT: MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: MAX_INT * T1.W, T0.Y, literal.w, +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: 967029397(3.122284e-04), -330(nan) +; CM-NEXT: ADD T4.X, T0.Z, -T1.Y, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.x, +; CM-NEXT: ADD_INT T0.Z, T0.Y, literal.y, +; CM-NEXT: SETGT_UINT * T1.W, T0.Y, literal.z, +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: -229(nan), 0(0.000000e+00) +; CM-NEXT: SETGT_UINT T6.X, T1.X, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T0.Z, T0.Y, literal.y, +; CM-NEXT: ADD * T3.W, PV.X, T1.Z, +; CM-NEXT: -229(nan), -127(nan) +; CM-NEXT: EXP_IEEE T1.X (MASKED), T3.W, +; CM-NEXT: EXP_IEEE T1.Y (MASKED), T3.W, +; CM-NEXT: EXP_IEEE T1.Z, T3.W, +; CM-NEXT: EXP_IEEE * T1.W (MASKED), T3.W, +; CM-NEXT: CNDE_INT T4.X, T0.Z, T1.Y, T0.Y, +; CM-NEXT: CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212 +; CM-NEXT: SETGT_INT T2.Z, T1.X, literal.x, +; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.y, +; CM-NEXT: -127(nan), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T2.X, T1.Z, literal.x, +; CM-NEXT: MUL_IEEE T4.Y, PV.W, literal.y, +; CM-NEXT: CNDE_INT T3.Z, PV.Z, PV.Y, T1.X, +; CM-NEXT: MIN_INT * T4.W, T1.X, literal.z, +; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; CM-NEXT: MIN_INT T7.X, T0.Y, literal.x, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T4.Z, T1.X, literal.z, +; CM-NEXT: SETGT_UINT * T4.W, T1.X, literal.w, +; CM-NEXT: 381(5.338947e-43), -254(nan) +; CM-NEXT: -127(nan), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T8.X, PV.W, PV.Z, PV.Y, +; CM-NEXT: SETGT_INT T1.Y, T1.X, literal.x, +; CM-NEXT: ADD_INT T4.Z, PV.X, literal.y, +; CM-NEXT: ADD_INT * T5.W, T0.Y, literal.z, +; CM-NEXT: 127(1.779649e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T1.X, T2.W, PV.W, PV.Z, +; CM-NEXT: CNDE_INT T5.Y, PV.Y, T3.Z, PV.X, +; CM-NEXT: CNDE_INT T3.Z, T6.X, T4.Y, T3.W, +; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_INT T6.X, T0.Y, literal.x, +; CM-NEXT: CNDE_INT T0.Y, T4.W, T2.X, PV.W, +; CM-NEXT: CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z, +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: ALU clause starting at 205: +; CM-NEXT: LSHL * T2.W, T5.Y, literal.x, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: ADD_INT T2.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y, +; CM-NEXT: CNDE_INT * T1.Z, T6.X, T4.X, T1.X, +; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; CM-NEXT: CNDE_INT * T1.W, T1.W, T3.Y, T2.Y, +; CM-NEXT: CNDE_INT T1.X, T0.Z, PV.W, T0.W, +; CM-NEXT: LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T2.X, +; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X, +; CM-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02) +; CM-NEXT: CNDE T2.X, PV.W, PV.Z, 0.0, +; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T6.X, PV.X, T5.X, +; CM-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00) +; CM-NEXT: SETGT T1.X, KC0[3].W, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z, +; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z, +; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z, +; CM-NEXT: 1118925336(8.872284e+01), -1026650416(-1.032789e+02) +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: SETGT T2.X, literal.x, KC0[3].Y, +; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0, +; CM-NEXT: CNDE T0.Z, PV.X, T3.X, literal.y, +; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z, +; CM-NEXT: -1026650416(-1.032789e+02), 2139095040(INF) +; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) +; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x, +; CM-NEXT: CNDE T1.Z, PV.X, T0.X, 0.0, +; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y, +; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01) +; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %in) store <4 x float> %result, ptr addrspace(1) %out @@ -1108,64 +2570,148 @@ } define float @v_exp_f32(float %in) { -; GCN-SDAG-LABEL: v_exp_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32: @@ -1182,68 +2728,150 @@ } define float @v_exp_fabs_f32(float %in) { -; GCN-SDAG-LABEL: v_exp_fabs_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_fabs_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; GCN-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fabs_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fabs_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fabs_f32: @@ -1261,68 +2889,150 @@ } define float @v_exp_fneg_fabs_f32(float %in) { -; GCN-SDAG-LABEL: v_exp_fneg_fabs_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b -; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_fneg_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e64 v4, -|v0|, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_fneg_fabs_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; GCN-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_fneg_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_or_b32_e32 v1, 0x80000000, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e64 v2, -|v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_fneg_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_fneg_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, -|v0|, s4 +; GFX900-GISEL-NEXT: v_fma_f32 v2, -|v0|, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, -|v0|, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_fabs_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b -; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fneg_fabs_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e64 v1, -|v0|, s4 +; SI-GISEL-NEXT: v_fma_f32 v2, -|v0|, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, -|v0|, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fneg_fabs_f32: @@ -1341,66 +3051,150 @@ } define float @v_exp_fneg_f32(float %in) { -; GCN-SDAG-LABEL: v_exp_fneg_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_fneg_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e64 v4, -v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_fneg_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; GCN-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_fneg_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e64 v2, -v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_fneg_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_fneg_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, -v0, s4 +; GFX900-GISEL-NEXT: v_fma_f32 v2, -v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, -v0, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fneg_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e64 v1, -v0, s4 +; SI-GISEL-NEXT: v_fma_f32 v2, -v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, -v0, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fneg_f32: @@ -1418,19 +3212,125 @@ } define float @v_exp_f32_fast(float %in) { -; GCN-LABEL: v_exp_f32_fast: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_fast: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_fast: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_fast: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_fast: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_fast: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_fast: ; R600: ; %bb.0: @@ -1446,19 +3346,149 @@ } define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GCN-LABEL: v_exp_f32_unsafe_math_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_unsafe_math_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_unsafe_math_attr: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_unsafe_math_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_unsafe_math_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_unsafe_math_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_unsafe_math_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_unsafe_math_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -1474,19 +3504,149 @@ } define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GCN-LABEL: v_exp_f32_approx_fn_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_approx_fn_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_approx_fn_attr: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_approx_fn_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_approx_fn_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_approx_fn_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_approx_fn_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_approx_fn_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -1502,64 +3662,124 @@ } define float @v_exp_f32_ninf(float %in) { -; GCN-SDAG-LABEL: v_exp_f32_ninf: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_ninf: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_ninf: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf: @@ -1576,19 +3796,149 @@ } define float @v_exp_f32_afn(float %in) { -; GCN-LABEL: v_exp_f32_afn: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_afn: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_afn: ; R600: ; %bb.0: @@ -1632,19 +3982,147 @@ } define float @v_exp_f32_afn_dynamic(float %in) #1 { -; GCN-LABEL: v_exp_f32_afn_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_afn_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_afn_dynamic: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_afn_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_afn_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_afn_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_afn_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_afn_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_afn_dynamic: ; R600: ; %bb.0: @@ -1660,36 +4138,150 @@ } define float @v_fabs_exp_f32_afn(float %in) { -; GCN-SDAG-LABEL: v_fabs_exp_f32_afn: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_fabs_exp_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_fabs_exp_f32_afn: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; GCN-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_fabs_exp_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_fabs_exp_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_fabs_exp_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_fabs_exp_f32_afn: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_fabs_exp_f32_afn: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_exp_f32_afn: @@ -1707,19 +4299,147 @@ } define float @v_exp_f32_daz(float %in) #0 { -; GCN-LABEL: v_exp_f32_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_daz: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_daz: ; R600: ; %bb.0: @@ -1735,64 +4455,148 @@ } define float @v_exp_f32_nnan(float %in) { -; GCN-SDAG-LABEL: v_exp_f32_nnan: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_nnan: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_nnan: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_nnan: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_nnan: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan: @@ -1809,19 +4613,147 @@ } define float @v_exp_f32_nnan_daz(float %in) #0 { -; GCN-LABEL: v_exp_f32_nnan_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_nnan_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp_f32_nnan_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_nnan_daz: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; GFX900-GISEL-LABEL: v_exp_f32_nnan_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_nnan_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_nnan_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_daz: ; R600: ; %bb.0: @@ -1837,64 +4769,146 @@ } define float @v_exp_f32_nnan_dynamic(float %in) #1 { -; GCN-SDAG-LABEL: v_exp_f32_nnan_dynamic: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_nnan_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_nnan_dynamic: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_nnan_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_nnan_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_nnan_dynamic: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_dynamic: @@ -1911,19 +4925,123 @@ } define float @v_exp_f32_ninf_daz(float %in) #0 { -; GCN-LABEL: v_exp_f32_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_ninf_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_ninf_daz: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_ninf_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf_daz: ; R600: ; %bb.0: @@ -1939,64 +5057,122 @@ } define float @v_exp_f32_ninf_dynamic(float %in) #1 { -; GCN-SDAG-LABEL: v_exp_f32_ninf_dynamic: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_ninf_dynamic: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_ninf_dynamic: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf_dynamic: @@ -2013,64 +5189,124 @@ } define float @v_exp_f32_nnan_ninf(float %in) { -; GCN-SDAG-LABEL: v_exp_f32_nnan_ninf: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_nnan_ninf: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf: @@ -2087,19 +5323,123 @@ } define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { -; GCN-LABEL: v_exp_f32_nnan_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-LABEL: v_exp_f32_nnan_ninf_daz: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf_daz: ; R600: ; %bb.0: @@ -2115,64 +5455,122 @@ } define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { -; GCN-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf_dynamic: @@ -2217,64 +5615,146 @@ } define float @v_exp_f32_dynamic_mode(float %in) #1 { -; GCN-SDAG-LABEL: v_exp_f32_dynamic_mode: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_dynamic_mode: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_dynamic_mode: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_dynamic_mode: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_dynamic_mode: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_dynamic_mode: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_dynamic_mode: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_dynamic_mode: @@ -2291,48 +5771,120 @@ } define float @v_exp_f32_undef() { -; GCN-SDAG-LABEL: v_exp_f32_undef: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_undef: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, 0, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v1, 0x7fc00000, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_undef: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_undef: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_sub_f32_e64 v0, s4, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_undef: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_undef: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f +; GFX900-GISEL-NEXT: v_fma_f32 v1, s4, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_undef: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_undef: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f +; SI-GISEL-NEXT: v_fma_f32 v1, s4, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_undef: @@ -2375,68 +5927,154 @@ } define float @v_exp_f32_from_fpext_f16(i16 %src.i) { -; GCN-SDAG-LABEL: v_exp_f32_from_fpext_f16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_from_fpext_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v3 +; VI-SDAG-NEXT: v_rndne_f32_e32 v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v4 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_from_fpext_f16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_from_fpext_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_from_fpext_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_f16: @@ -2455,56 +6093,136 @@ } define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { -; GCN-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_from_fpext_math_f16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16: @@ -2512,19 +6230,27 @@ ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_math_f16: @@ -2545,66 +6271,151 @@ } define float @v_exp_f32_from_fpext_bf16(bfloat %src) { -; GCN-SDAG-LABEL: v_exp_f32_from_fpext_bf16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_from_fpext_bf16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp_f32_from_fpext_bf16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_exp_f32_from_fpext_bf16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_bf16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_bf16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_bf16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_from_fpext_bf16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_bf16: @@ -2636,9 +6447,21 @@ ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_fast: @@ -2671,23 +6494,134 @@ } define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { -; GCN-LABEL: v_exp_f32_from_fpext_math_f16_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_add_f16_e32 v0, v0, v1 -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v3 +; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s4, -v2 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s5, v3 +; GFX900-SDAG-NEXT: v_mad_mix_f32 v0, v0, s4, -v2 op_sel_hi:[1,0,0] +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v1, s4, -v3 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v1, v2, v4 +; GFX900-GISEL-NEXT: v_mad_mix_f32 v0, v0, s4, -v3 op_sel_hi:[1,0,0] +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz: @@ -2695,11 +6629,27 @@ ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_math_f16_daz: @@ -2724,41 +6674,29 @@ ; GCN-LABEL: v_exp_f16: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 -; GCN-NEXT: v_exp_f16_e32 v0, v0 +; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2776,53 +6714,32 @@ } define half @v_exp_fabs_f16(half %in) { -; GCN-SDAG-LABEL: v_exp_fabs_f16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; GCN-SDAG-NEXT: v_mul_f16_e64 v0, |v0|, s4 -; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_exp_fabs_f16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5 -; GCN-GISEL-NEXT: v_mul_f16_e64 v0, |v0|, v1 -; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_exp_fabs_f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fabs_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fabs_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2844,50 +6761,38 @@ ; GCN-SDAG-LABEL: v_exp_fneg_fabs_f16: ; GCN-SDAG: ; %bb.0: ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xbdc5 -; GCN-SDAG-NEXT: v_mul_f16_e64 v0, |v0|, s4 -; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp_fneg_fabs_f16: ; GCN-GISEL: ; %bb.0: ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5 -; GCN-GISEL-NEXT: v_mul_f16_e64 v0, -|v0|, v1 -; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0 +; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_fabs_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fneg_fabs_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2910,49 +6815,38 @@ ; GCN-SDAG-LABEL: v_exp_fneg_f16: ; GCN-SDAG: ; %bb.0: ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_mul_f16_e32 v0, 0xbdc5, v0 -; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp_fneg_f16: ; GCN-GISEL: ; %bb.0: ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5 -; GCN-GISEL-NEXT: v_mul_f16_e64 v0, -v0, v1 -; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0 +; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fneg_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2982,24 +6876,21 @@ ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f16_fast: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 0x3dc5 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3021,70 +6912,60 @@ ; VI-SDAG-LABEL: v_exp_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5 -; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 -; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_exp_v2f16: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 -; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-SDAG-LABEL: v_exp_v2f16: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] -; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0 -; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_exp_v2f16: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_exp_v2f16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_v2f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5] -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_v2f16: @@ -3092,23 +6973,10 @@ ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3130,12 +6998,14 @@ ; VI-SDAG-LABEL: v_exp_fabs_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5 -; VI-SDAG-NEXT: v_mul_f16_sdwa v1, |v0|, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SDAG-NEXT: v_mul_f16_e64 v0, |v0|, s4 -; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3143,22 +7013,28 @@ ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 -; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_fabs_v2f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 -; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] -; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0 -; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v0| +; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3166,12 +7042,15 @@ ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 -; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fabs_v2f16: @@ -3179,25 +7058,16 @@ ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5] -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fabs_v2f16: @@ -3210,25 +7080,12 @@ ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1 -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fabs_v2f16: @@ -3249,12 +7106,14 @@ ; VI-SDAG-LABEL: v_exp_fneg_fabs_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5 -; VI-SDAG-NEXT: v_mul_f16_sdwa v1, -|v0|, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SDAG-NEXT: v_mul_f16_e64 v0, -|v0|, s4 -; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3262,22 +7121,28 @@ ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 -; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_fneg_fabs_v2f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbdc5 -; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] -; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0 -; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v0| +; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3285,12 +7150,15 @@ ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 -; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_fabs_v2f16: @@ -3298,8 +7166,6 @@ ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 @@ -3307,20 +7173,13 @@ ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v0 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[4:5] -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fneg_fabs_v2f16: @@ -3333,25 +7192,12 @@ ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1 -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fneg_fabs_v2f16: @@ -3373,12 +7219,14 @@ ; VI-SDAG-LABEL: v_exp_fneg_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5 -; VI-SDAG-NEXT: v_mul_f16_sdwa v1, -v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SDAG-NEXT: v_mul_f16_e64 v0, -v0, s4 -; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3386,21 +7234,28 @@ ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 -; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_fneg_v2f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbdc5 -; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] -; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0 -; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -v0 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3408,12 +7263,15 @@ ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 -; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_v2f16: @@ -3421,8 +7279,6 @@ ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 @@ -3430,20 +7286,13 @@ ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v0 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[4:5] -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_fneg_v2f16: @@ -3456,25 +7305,12 @@ ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1 -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fneg_v2f16: @@ -3540,34 +7376,30 @@ ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5] -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_v2f16_fast: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 0x3dc5 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -3591,41 +7423,38 @@ ; VI-LABEL: v_exp_v3f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v3, 0x3dc5 -; VI-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 -; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_exp_f16_e32 v2, v2 -; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 -; VI-NEXT: v_exp_f16_e32 v1, v1 +; VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-NEXT: v_exp_f32_e32 v2, v2 +; VI-NEXT: v_exp_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-NEXT: v_exp_f32_e32 v1, v1 +; VI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; VI-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-SDAG-LABEL: v_exp_v3f16: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 -; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2 -; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 -; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 -; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1 -; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0 -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_exp_v3f16: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 -; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2 -; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 -; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 -; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2 -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_exp_v3f16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX900-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_v3f16: ; SI-SDAG: ; %bb.0: @@ -3633,67 +7462,36 @@ ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 -; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v2 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5] +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[4:5] -; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_v3f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v5 -; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc -; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v5, vcc ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3712,29 +7510,52 @@ } define <3 x half> @v_exp_v3f16_afn(<3 x half> %in) { -; VI-LABEL: v_exp_v3f16_afn: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v3, 0x3dc5 -; VI-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 -; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_exp_f16_e32 v2, v2 -; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 -; VI-NEXT: v_exp_f16_e32 v1, v1 -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_exp_v3f16_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0 +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp_v3f16_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3dc5 +; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 +; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-GISEL-NEXT: v_exp_f16_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 +; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 +; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_v3f16_afn: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 -; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 -; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2 -; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 -; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 -; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3757,44 +7578,39 @@ ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc -; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v2 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5] ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[4:5] -; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_v3f16_afn: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 0x3dc5 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v3 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 @@ -3830,5 +7646,3 @@ attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX900: {{.*}}