Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2529,7 +2529,7 @@ std::pair AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, SDValue Src, SDNodeFlags Flags) const { - if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags)) + if (!needsDenormHandlingF32(DAG, Src, Flags)) return {}; MVT VT = MVT::f32; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3043,8 +3043,7 @@ std::pair AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src, unsigned Flags) const { - if (allowApproxFunc(B.getMF(), Flags) || - !needsDenormHandlingF32(B.getMF(), Src, Flags)) + if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) return {}; const LLT F32 = LLT::scalar(32); Index: llvm/test/CodeGen/AMDGPU/llvm.log2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -1511,17 +1511,59 @@ } define float @v_log2_f32_fast(float %in) { -; GFX689-LABEL: v_log2_f32_fast: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_fast: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_fast: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_fast: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_fast: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_fast: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_fast: ; R600: ; %bb.0: @@ -1537,17 +1579,59 @@ } define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GFX689-LABEL: v_log2_f32_unsafe_math_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_unsafe_math_attr: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_unsafe_math_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_unsafe_math_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -1563,17 +1647,59 @@ } define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GFX689-LABEL: v_log2_f32_approx_fn_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_approx_fn_attr: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_approx_fn_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -1657,17 +1783,59 @@ } define float @v_log2_f32_afn(float %in) { -; GFX689-LABEL: v_log2_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_afn: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_afn: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn: ; R600: ; %bb.0: @@ -1709,17 +1877,59 @@ } define float @v_log2_f32_afn_dynamic(float %in) #1 { -; GFX689-LABEL: v_log2_f32_afn_dynamic: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_afn_dynamic: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_afn_dynamic: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn_dynamic: ; R600: ; %bb.0: @@ -1735,17 +1945,61 @@ } define float @v_fabs_log2_f32_afn(float %in) { -; GFX689-LABEL: v_fabs_log2_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e64 v0, |v0| -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_fabs_log2_f32_afn: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_fabs_log2_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e64 v0, |v0| -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_fabs_log2_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 +; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log2_f32_afn: ; R600: ; %bb.0: