Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2828,6 +2828,8 @@ // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! // // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) ) + SDNodeFlags FlagsNoContract = Flags; + FlagsNoContract.setAllowContract(false); SDValue PH, PL; if (Subtarget->hasFastFMAF32()) { @@ -2867,7 +2869,10 @@ } SDValue E = DAG.getNode(ISD::FRINT, SL, VT, PH, Flags); - SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, Flags); + + // It is unsafe to constract this fsub into the PH multiply. + SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract); + SDValue A = DAG.getNode(ISD::FADD, SL, VT, PHSubE, PL, Flags); SDValue IntE = DAG.getNode(ISD::FP_TO_SINT, SL, MVT::i32, E); SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags); Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3312,7 +3312,7 @@ MachineIRBuilder &B) const { Register Dst = MI.getOperand(0).getReg(); Register X = MI.getOperand(1).getReg(); - unsigned Flags = MI.getFlags(); + const unsigned Flags = MI.getFlags(); MachineFunction &MF = B.getMF(); MachineRegisterInfo &MRI = *B.getMRI(); LLT Ty = MRI.getType(Dst); @@ -3375,7 +3375,7 @@ // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! // // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) ) - + const unsigned FlagsNoContract = Flags & ~MachineInstr::FmContract; Register PH, PL; if (ST.hasFastFMAF32()) { @@ -3414,7 +3414,9 @@ } auto E = B.buildFRint(Ty, PH, Flags); - auto PHSubE = B.buildFSub(Ty, PH, E, Flags); + + // It is unsafe to contract this fsub into the PH multiply. + auto PHSubE = B.buildFSub(Ty, PH, E, FlagsNoContract); auto A = B.buildFAdd(Ty, PHSubE, PL, Flags); auto IntE = B.buildFPTOSI(LLT::scalar(32), E); Index: llvm/test/CodeGen/AMDGPU/llvm.exp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.exp.ll +++ llvm/test/CodeGen/AMDGPU/llvm.exp.ll @@ -3259,19 +3259,19 @@ ; GFX900-SDAG-LABEL: v_exp_f32_fast: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3282,13 +3282,13 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -3297,19 +3297,19 @@ ; SI-SDAG-LABEL: v_exp_f32_fast: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3320,13 +3320,13 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -6466,19 +6466,19 @@ ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -7685,20 +7685,20 @@ ; GFX900-SDAG-LABEL: v_exp_f32_contract: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -7712,14 +7712,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -7731,20 +7731,20 @@ ; SI-SDAG-LABEL: v_exp_f32_contract: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -7758,14 +7758,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -7843,20 +7843,20 @@ ; GFX900-SDAG-LABEL: v_exp_f32_contract_daz: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -7870,14 +7870,14 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -7889,20 +7889,20 @@ ; SI-SDAG-LABEL: v_exp_f32_contract_daz: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 @@ -7916,14 +7916,14 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 @@ -7993,19 +7993,19 @@ ; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -8016,13 +8016,13 @@ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc @@ -8031,19 +8031,19 @@ ; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -8054,13 +8054,13 @@ ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc