Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -99,7 +99,7 @@ Module *Mod = nullptr; const DataLayout *DL = nullptr; bool HasUnsafeFPMath = false; - bool HasFP32Denormals = false; + bool HasFP32DenormalFlush = false; bool FlowChanged = false; DenseMap BreakPhiNodesCache; @@ -794,8 +794,8 @@ // // NOTE: optimizeWithRcp should be tried first because rcp is the preference. static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy, - bool HasDenormals, IRBuilder<> &Builder, - Module *Mod) { + bool HasFP32DenormalFlush, + IRBuilder<> &Builder, Module *Mod) { // fdiv.fast can achieve 2.5 ULP accuracy. if (ReqdAccuracy < 2.5f) return nullptr; @@ -812,7 +812,7 @@ } // fdiv does not support denormals. But 1.0/x is always fine to use it. - if (HasDenormals && !NumIsOne) + if (!HasFP32DenormalFlush && !NumIsOne) return nullptr; Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast); @@ -857,8 +857,9 @@ // rcp_f16 is accurate for !fpmath >= 1.0ulp. // rcp_f32 is accurate for !fpmath >= 1.0ulp and denormals are flushed. // rcp_f64 is never accurate. - const bool RcpIsAccurate = (Ty->isHalfTy() && ReqdAccuracy >= 1.0f) || - (Ty->isFloatTy() && !HasFP32Denormals && ReqdAccuracy >= 1.0f); + const bool RcpIsAccurate = + (Ty->isHalfTy() && ReqdAccuracy >= 1.0f) || + (Ty->isFloatTy() && HasFP32DenormalFlush && ReqdAccuracy >= 1.0f); IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator())); Builder.setFastMathFlags(FMF); @@ -880,8 +881,8 @@ Value *NewElt = optimizeWithRcp(NumEltI, DenEltI, AllowInaccurateRcp, RcpIsAccurate, Builder, Mod); if (!NewElt) // Try fdiv.fast. - NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy, - HasFP32Denormals, Builder, Mod); + NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy, + HasFP32DenormalFlush, Builder, Mod); if (!NewElt) // Keep the original. NewElt = Builder.CreateFDiv(NumEltI, DenEltI); @@ -892,8 +893,8 @@ NewFDiv = optimizeWithRcp(Num, Den, AllowInaccurateRcp, RcpIsAccurate, Builder, Mod); if (!NewFDiv) { // Try fdiv.fast. - NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy, HasFP32Denormals, - Builder, Mod); + NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy, + HasFP32DenormalFlush, Builder, Mod); } } @@ -1864,7 +1865,8 @@ Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr; Impl.HasUnsafeFPMath = hasUnsafeFPMath(F); SIModeRegisterDefaults Mode(F); - Impl.HasFP32Denormals = Mode.allFP32Denormals(); + Impl.HasFP32DenormalFlush = + Mode.FP32Denormals == DenormalMode::getPreserveSign(); return Impl.run(F); } @@ -1880,7 +1882,8 @@ Impl.DT = FAM.getCachedResult(F); Impl.HasUnsafeFPMath = hasUnsafeFPMath(F); SIModeRegisterDefaults Mode(F); - Impl.HasFP32Denormals = Mode.allFP32Denormals(); + Impl.HasFP32DenormalFlush = + Mode.FP32Denormals == DenormalMode::getPreserveSign(); PreservedAnalyses PA = PreservedAnalyses::none(); if (!Impl.FlowChanged) PA.preserveSet(); Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1872,7 +1872,8 @@ bool UseFmadFtz = false; if (Subtarget->isGCN()) { const SIMachineFunctionInfo *MFI = MF.getInfo(); - UseFmadFtz = MFI->getMode().allFP32Denormals(); + UseFmadFtz = + MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign(); } // float fr = mad(fqneg, fb, fa); @@ -1964,11 +1965,11 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); // Compute denominator reciprocal. - unsigned FMAD = !Subtarget->hasMadMacF32Insts() ? - (unsigned)ISD::FMA : - !MFI->getMode().allFP32Denormals() ? - (unsigned)ISD::FMAD : - (unsigned)AMDGPUISD::FMAD_FTZ; + unsigned FMAD = + !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA + : MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign() + ? (unsigned)ISD::FMAD + : (unsigned)AMDGPUISD::FMAD_FTZ; SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo); SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi); Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -110,12 +110,12 @@ class AMDGPUPatIgnoreCopies : AMDGPUPat; let RecomputePerFunction = 1 in { -def FP16Denormals : Predicate<"MF->getInfo()->getMode().allFP64FP16Denormals()">; -def FP32Denormals : Predicate<"MF->getInfo()->getMode().allFP32Denormals()">; -def FP64Denormals : Predicate<"MF->getInfo()->getMode().allFP64FP16Denormals()">; -def NoFP16Denormals : Predicate<"!MF->getInfo()->getMode().allFP64FP16Denormals()">; -def NoFP32Denormals : Predicate<"!MF->getInfo()->getMode().allFP32Denormals()">; -def NoFP64Denormals : Predicate<"!MF->getInfo()->getMode().allFP64FP16Denormals()">; +def FP16Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; +def FP32Denormals : Predicate<"MF->getInfo()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">; +def FP64Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; +def NoFP16Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; +def NoFP32Denormals : Predicate<"MF->getInfo()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">; +def NoFP64Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; } Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2951,9 +2951,11 @@ // TODO: Always legal with future ftz flag. // FIXME: Do we need just output? - if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals()) + if (Ty == LLT::scalar(32) && + MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()) return true; - if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals()) + if (Ty == LLT::scalar(16) && + MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()) return true; MachineIRBuilder HelperBuilder(MI); @@ -4613,7 +4615,7 @@ // FIXME: Doesn't correctly model the FP mode switch, and the FP operations // aren't modeled as reading it. - if (!Mode.allFP32Denormals()) + if (Mode.FP32Denormals != DenormalMode::getIEEE()) toggleSPDenormMode(true, B, ST, Mode); auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags); @@ -4623,7 +4625,9 @@ auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags); auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags); - if (!Mode.allFP32Denormals()) + // FIXME: This mishandles dynamic denormal mode. We need to query the + // current mode and restore the original. + if (Mode.FP32Denormals != DenormalMode::getIEEE()) toggleSPDenormMode(false, B, ST, Mode); auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false) Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -297,8 +297,9 @@ TLI(ST->getTargetLowering()), CommonTTI(TM, F), IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) { SIModeRegisterDefaults Mode(F); - HasFP32Denormals = Mode.allFP32Denormals(); - HasFP64FP16Denormals = Mode.allFP64FP16Denormals(); + HasFP32Denormals = Mode.FP32Denormals != DenormalMode::getPreserveSign(); + HasFP64FP16Denormals = + Mode.FP64FP16Denormals != DenormalMode::getPreserveSign(); } unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const { Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -57,14 +57,14 @@ cl::desc("Use indirect register addressing for divergent indexes"), cl::init(false)); -static bool hasFP32Denormals(const MachineFunction &MF) { +static bool denormalModeIsFlushAllF32(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); - return Info->getMode().allFP32Denormals(); + return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign(); } -static bool hasFP64FP16Denormals(const MachineFunction &MF) { +static bool denormalModeIsFlushAllF64F16(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); - return Info->getMode().allFP64FP16Denormals(); + return Info->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign(); } static unsigned findFirstFreeSGPR(CCState &CCInfo) { @@ -829,10 +829,10 @@ EVT DestVT, EVT SrcVT) const { return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) || (Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) && - DestVT.getScalarType() == MVT::f32 && - SrcVT.getScalarType() == MVT::f16 && - // TODO: This probably only requires no input flushing? - !hasFP32Denormals(DAG.getMachineFunction()); + DestVT.getScalarType() == MVT::f32 && + SrcVT.getScalarType() == MVT::f16 && + // TODO: This probably only requires no input flushing? + denormalModeIsFlushAllF32(DAG.getMachineFunction()); } bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, @@ -842,7 +842,7 @@ DestTy.getScalarSizeInBits() == 32 && SrcTy.getScalarSizeInBits() == 16 && // TODO: This probably only requires no input flushing? - !hasFP32Denormals(*MI.getMF()); + denormalModeIsFlushAllF32(*MI.getMF()); } bool SITargetLowering::isShuffleMaskLegal(ArrayRef, EVT) const { @@ -4664,7 +4664,7 @@ // Otherwise f32 mad is always full rate and returns the same result as // the separate operations so should be preferred over fma. // However does not support denormals. - if (hasFP32Denormals(MF)) + if (!denormalModeIsFlushAllF32(MF)) return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts(); // If the subtarget has v_fmac_f32, that's just as good as v_mac_f32. @@ -4673,7 +4673,7 @@ case MVT::f64: return true; case MVT::f16: - return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF); + return Subtarget->has16BitInsts() && !denormalModeIsFlushAllF64F16(MF); default: break; } @@ -4702,9 +4702,10 @@ return false; if (Ty.getScalarSizeInBits() == 16) - return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF()); + return Subtarget->hasMadF16() && denormalModeIsFlushAllF64F16(*MI.getMF()); if (Ty.getScalarSizeInBits() == 32) - return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF()); + return Subtarget->hasMadMacF32Insts() && + denormalModeIsFlushAllF32(*MI.getMF()); return false; } @@ -4716,10 +4717,10 @@ EVT VT = N->getValueType(0); if (VT == MVT::f32) return Subtarget->hasMadMacF32Insts() && - !hasFP32Denormals(DAG.getMachineFunction()); + denormalModeIsFlushAllF32(DAG.getMachineFunction()); if (VT == MVT::f16) { return Subtarget->hasMadF16() && - !hasFP64FP16Denormals(DAG.getMachineFunction()); + denormalModeIsFlushAllF64F16(DAG.getMachineFunction()); } return false; @@ -9324,15 +9325,13 @@ // Returns immediate value for setting the F32 denorm mode when using the // S_DENORM_MODE instruction. -static SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG, - const SDLoc &SL, const GCNSubtarget *ST) { +static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG, + const SIMachineFunctionInfo *Info, + const GCNSubtarget *ST) { assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE"); - int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction()) - ? FP_DENORM_FLUSH_NONE - : FP_DENORM_FLUSH_IN_FLUSH_OUT; - - int Mode = SPDenormMode | (DPDenormModeDefault << 2); - return DAG.getTargetConstant(Mode, SL, MVT::i32); + uint32_t DPDenormModeDefault = Info->getMode().fpDenormModeDPValue(); + uint32_t Mode = SPDenormMode | (DPDenormModeDefault << 2); + return DAG.getTargetConstant(Mode, SDLoc(), MVT::i32); } SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { @@ -9370,7 +9369,11 @@ (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_); const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32); - const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction()); + const MachineFunction &MF = DAG.getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + const DenormalMode DenormMode = Info->getMode().FP32Denormals; + + const bool HasFP32Denormals = DenormMode == DenormalMode::getIEEE(); if (!HasFP32Denormals) { // Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV @@ -9382,7 +9385,7 @@ SDNode *EnableDenorm; if (Subtarget->hasDenormModeInst()) { const SDValue EnableDenormValue = - getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget); + getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget); EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, DAG.getEntryNode(), EnableDenormValue).getNode(); @@ -9422,10 +9425,13 @@ NumeratorScaled, Fma3, Flags); if (!HasFP32Denormals) { + // FIXME: This mishandles dynamic denormal mode. We need to query the + // current mode and restore the original. + SDNode *DisableDenorm; if (Subtarget->hasDenormModeInst()) { - const SDValue DisableDenormValue = - getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget); + const SDValue DisableDenormValue = getSPDenormModeValue( + FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget); DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other, Fma4.getValue(1), DisableDenormValue, @@ -11360,10 +11366,11 @@ // Only do this if we are not trying to support denormals. v_mad_f32 does not // support denormals ever. - if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) || - (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) && - getSubtarget()->hasMadF16())) && - isOperationLegal(ISD::FMAD, VT)) + if (((VT == MVT::f32 && + denormalModeIsFlushAllF32(DAG.getMachineFunction())) || + (VT == MVT::f16 && Subtarget->hasMadF16() && + denormalModeIsFlushAllF64F16(DAG.getMachineFunction()))) && + isOperationLegal(ISD::FMAD, VT)) return ISD::FMAD; const TargetOptions &Options = DAG.getTarget().Options; @@ -13312,10 +13319,10 @@ EVT VT) const { switch (VT.getScalarType().getSimpleVT().SimpleTy) { case MVT::f32: - return hasFP32Denormals(DAG.getMachineFunction()); + return !denormalModeIsFlushAllF32(DAG.getMachineFunction()); case MVT::f64: case MVT::f16: - return hasFP64FP16Denormals(DAG.getMachineFunction()); + return !denormalModeIsFlushAllF64F16(DAG.getMachineFunction()); default: return false; } @@ -13325,10 +13332,10 @@ MachineFunction &MF) const { switch (Ty.getScalarSizeInBits()) { case 32: - return hasFP32Denormals(MF); + return !denormalModeIsFlushAllF32(MF); case 64: case 16: - return hasFP64FP16Denormals(MF); + return !denormalModeIsFlushAllF64F16(MF); default: return false; } Index: llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h =================================================================== --- llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h +++ llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h @@ -54,14 +54,6 @@ FP64FP16Denormals == Other.FP64FP16Denormals; } - bool allFP32Denormals() const { - return FP32Denormals == DenormalMode::getIEEE(); - } - - bool allFP64FP16Denormals() const { - return FP64FP16Denormals == DenormalMode::getIEEE(); - } - /// Get the encoding value for the FP_DENORM bits of the mode register for the /// FP32 denormal mode. uint32_t fpDenormModeSPValue() const { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir @@ -34,6 +34,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 @@ -63,6 +64,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 @@ -99,6 +101,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 @@ -199,6 +202,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 @@ -241,6 +245,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 @@ -290,6 +295,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -332,6 +338,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -381,6 +388,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 @@ -411,6 +419,7 @@ machineFunctionInfo: mode: fp32-input-denormals: false + fp32-output-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -4,7 +4,7 @@ ; NOOP-LABEL: @noop_fdiv_fpmath( ; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0 -define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #3 { +define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) { %md.25ulp = fdiv float %a, %b, !fpmath !0 store volatile float %md.25ulp, ptr addrspace(1) %out ret void @@ -337,9 +337,24 @@ ret void } +; CHECK-LABEL: @rcp_fpmath_dynamic_denorm( +; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !2 +define float @rcp_fpmath_dynamic_denorm(float %x) #3 { + %md.25ulp = fdiv float 1.0, %x, !fpmath !2 + ret float %md.25ulp +} + +; CHECK-LABEL: @rcp_dynamic_denorm( +; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x +define float @rcp_dynamic_denorm(float %x) #3 { + %md.25ulp = fdiv float 1.0, %x + ret float %md.25ulp +} + attributes #0 = { nounwind optnone noinline } attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } +attributes #3 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" } !0 = !{float 2.500000e+00} !1 = !{float 5.000000e-01} Index: llvm/test/CodeGen/AMDGPU/fdiv.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fdiv.ll +++ llvm/test/CodeGen/AMDGPU/fdiv.ll @@ -350,8 +350,20 @@ ret void } +; FUNC-LABEL: {{^}}v_fdiv_f32_dynamic_denorm: +; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; GFX10: s_denorm_mode 15 + +; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; GFX10: s_denorm_mode 12 +define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #3 { + %fdiv = fdiv float %a, %b + ret float %fdiv +} + attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" } attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" } attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" "target-features"="-flat-for-global" } +attributes #3 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" "target-features"="-flat-for-global" } !0 = !{float 2.500000e+00} Index: llvm/test/CodeGen/AMDGPU/llvm.exp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.exp.ll +++ llvm/test/CodeGen/AMDGPU/llvm.exp.ll @@ -3989,14 +3989,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 @@ -4012,14 +4013,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 @@ -4035,20 +4037,20 @@ ; GFX900-SDAG-LABEL: v_exp_f32_afn_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4062,14 +4064,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4081,20 +4083,20 @@ ; SI-SDAG-LABEL: v_exp_f32_afn_dynamic: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4108,14 +4110,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4306,14 +4308,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 @@ -4329,14 +4332,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 @@ -4352,20 +4356,20 @@ ; GFX900-SDAG-LABEL: v_exp_f32_daz: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4379,14 +4383,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4398,20 +4402,20 @@ ; SI-SDAG-LABEL: v_exp_f32_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4425,14 +4429,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4620,14 +4624,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 @@ -4643,14 +4648,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 @@ -4666,20 +4672,20 @@ ; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4693,14 +4699,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4712,20 +4718,20 @@ ; SI-SDAG-LABEL: v_exp_f32_nnan_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4739,14 +4745,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4776,14 +4782,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 @@ -4799,14 +4806,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 @@ -4822,20 +4830,20 @@ ; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4849,14 +4857,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4868,20 +4876,20 @@ ; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -4895,14 +4903,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -4932,14 +4940,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 @@ -4951,14 +4960,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 @@ -4970,19 +4980,19 @@ ; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4993,13 +5003,13 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5008,19 +5018,19 @@ ; SI-SDAG-LABEL: v_exp_f32_ninf_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5031,13 +5041,13 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5064,14 +5074,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 @@ -5083,14 +5094,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 @@ -5102,19 +5114,19 @@ ; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5125,13 +5137,13 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5140,19 +5152,19 @@ ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5163,13 +5175,13 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5330,14 +5342,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 @@ -5349,14 +5362,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 @@ -5368,19 +5382,19 @@ ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5391,13 +5405,13 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5406,19 +5420,19 @@ ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5429,13 +5443,13 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5462,14 +5476,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 @@ -5481,14 +5496,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 @@ -5500,19 +5516,19 @@ ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5523,13 +5539,13 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5538,19 +5554,19 @@ ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5561,13 +5577,13 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -5622,14 +5638,15 @@ ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 @@ -5645,14 +5662,15 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 @@ -5668,20 +5686,20 @@ ; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -5695,14 +5713,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -5714,20 +5732,20 @@ ; SI-SDAG-LABEL: v_exp_f32_dynamic_mode: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -5741,14 +5759,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -6499,19 +6517,20 @@ ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 -; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v3 -; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 @@ -6526,16 +6545,17 @@ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 @@ -6552,50 +6572,50 @@ ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 -; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s4, -v2 -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s5, v3 -; GFX900-SDAG-NEXT: v_mad_mix_f32 v0, v0, s4, -v2 op_sel_hi:[1,0,0] -; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v3 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 -; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v4, v1, s4, -v3 -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v3 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v1, v2, v4 -; GFX900-GISEL-NEXT: v_mad_mix_f32 v0, v0, s4, -v3 op_sel_hi:[1,0,0] -; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: @@ -6604,20 +6624,20 @@ ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -6630,7 +6650,6 @@ ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -6638,15 +6657,16 @@ ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 -; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v2 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc Index: llvm/test/CodeGen/AMDGPU/llvm.log.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -3347,9 +3347,9 @@ ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3362,9 +3362,9 @@ ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3375,28 +3375,34 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log_f32_daz: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log_f32_daz: @@ -3407,9 +3413,9 @@ ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3422,9 +3428,9 @@ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3687,9 +3693,9 @@ ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3702,9 +3708,9 @@ ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3715,28 +3721,34 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log_f32_nnan_daz: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log_f32_nnan_daz: @@ -3747,9 +3759,9 @@ ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3762,9 +3774,9 @@ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3826,11 +3838,11 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -3848,12 +3860,12 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -3873,13 +3885,16 @@ ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3896,13 +3911,16 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3918,11 +3936,11 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -3940,12 +3958,12 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4021,9 +4039,9 @@ ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4036,9 +4054,9 @@ ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4049,28 +4067,34 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log_f32_ninf_daz: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log_f32_ninf_daz: @@ -4081,9 +4105,9 @@ ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4096,9 +4120,9 @@ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4160,11 +4184,11 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4182,12 +4206,12 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4207,13 +4231,16 @@ ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4230,13 +4257,16 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4252,11 +4282,11 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4274,12 +4304,12 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4530,25 +4560,25 @@ ; SI-SDAG-LABEL: v_log_f32_nnan_ninf_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_log_f32_e32 v1, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 -; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 -; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_f32_nnan_ninf_daz: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 -; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 -; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 -; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log_f32_nnan_ninf_daz: @@ -4556,35 +4586,38 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_log_f32_e32 v0, v0 ; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v2 -; VI-NEXT: v_mac_f32_e32 v0, 0x3805fdf4, v1 -; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v2 -; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v1 +; VI-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0 +; VI-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-NEXT: v_add_f32_e32 v0, v1, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_daz: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_daz: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 -; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_daz: @@ -4640,13 +4673,13 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -4659,14 +4692,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -4680,13 +4713,16 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0 -; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218 -; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -4701,12 +4737,15 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0 -; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218 -; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -4720,13 +4759,13 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -4739,14 +4778,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -4844,11 +4883,11 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4866,12 +4905,12 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4891,13 +4930,16 @@ ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4914,13 +4956,16 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4936,11 +4981,11 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 @@ -4958,12 +5003,12 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 Index: llvm/test/CodeGen/AMDGPU/llvm.log10.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -3347,9 +3347,9 @@ ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3362,9 +3362,9 @@ ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3375,28 +3375,34 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log10_f32_daz: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log10_f32_daz: @@ -3407,9 +3413,9 @@ ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3422,9 +3428,9 @@ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3687,9 +3693,9 @@ ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3702,9 +3708,9 @@ ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3715,28 +3721,34 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log10_f32_nnan_daz: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log10_f32_nnan_daz: @@ -3747,9 +3759,9 @@ ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3762,9 +3774,9 @@ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3826,11 +3838,11 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -3848,12 +3860,12 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -3873,13 +3885,16 @@ ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3896,13 +3911,16 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3918,11 +3936,11 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -3940,12 +3958,12 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4021,9 +4039,9 @@ ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4036,9 +4054,9 @@ ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4049,28 +4067,34 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log10_f32_ninf_daz: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3 -; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log10_f32_ninf_daz: @@ -4081,9 +4105,9 @@ ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4096,9 +4120,9 @@ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4160,11 +4184,11 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4182,12 +4206,12 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4207,13 +4231,16 @@ ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4230,13 +4257,16 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4252,11 +4282,11 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4274,12 +4304,12 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4530,25 +4560,25 @@ ; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_log_f32_e32 v1, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 -; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 -; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_daz: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 -; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 -; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 -; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log10_f32_nnan_ninf_daz: @@ -4556,35 +4586,38 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_log_f32_e32 v0, v0 ; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v2 -; VI-NEXT: v_mac_f32_e32 v0, 0x369a84fb, v1 -; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v2 -; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v1 +; VI-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0 +; VI-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-NEXT: v_add_f32_e32 v0, v1, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_daz: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_daz: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 -; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_daz: @@ -4640,13 +4673,13 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -4659,14 +4692,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -4680,13 +4713,16 @@ ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0 -; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b -; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -4701,12 +4737,15 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0 -; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b -; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -4720,13 +4759,13 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -4739,14 +4778,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -4844,11 +4883,11 @@ ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4866,12 +4905,12 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4891,13 +4930,16 @@ ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -4914,13 +4956,16 @@ ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4936,11 +4981,11 @@ ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b @@ -4958,12 +5003,12 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b Index: llvm/test/CodeGen/AMDGPU/v_mac.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/v_mac.ll +++ llvm/test/CodeGen/AMDGPU/v_mac.ll @@ -279,6 +279,34 @@ ret void } +; Need to assume denormal handling is needed for dynamic denormal mode +; GCN-LABEL: {{^}}v_mac_f32_dynamic: +; GCN: v_mul_f32 +; GCN: v_add_f32 +define float @v_mac_f32_dynamic(float %a, float %b, float %c) "denormal-fp-math-f32"="dynamic,dynamic" { + %mul = fmul float %a, %b + %mad = fadd float %mul, %c + ret float %mad +} + +; GCN-LABEL: {{^}}v_mac_f32_dynamic_daz: +; GCN: v_mul_f32 +; GCN: v_add_f32 +define float @v_mac_f32_dynamic_daz(float %a, float %b, float %c) "denormal-fp-math-f32"="preserve-sign,dynamic" { + %mul = fmul float %a, %b + %mad = fadd float %mul, %c + ret float %mad +} + +; GCN-LABEL: {{^}}v_mac_f32_dynamic_ftz: +; GCN: v_mul_f32 +; GCN: v_add_f32 +define float @v_mac_f32_dynamic_ftz(float %a, float %b, float %c) "denormal-fp-math-f32"="dynamic,preserve-sign" { + %mul = fmul float %a, %b + %mad = fadd float %mul, %c + ret float %mad +} + declare i32 @llvm.amdgcn.workitem.id.x() #2 attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }