Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2469,7 +2469,18 @@ case ISD::FP_EXTEND: return Src.getOperand(0).getValueType() == MVT::f16; case ISD::FP16_TO_FP: + case ISD::FFREXP: return true; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast(Src.getOperand(0))->getZExtValue(); + switch (IntrinsicID) { + case Intrinsic::amdgcn_frexp_mant: + return true; + default: + return false; + } + } default: return false; } Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3020,9 +3020,30 @@ /// Return true if it's known that \p Src can never be an f32 denormal value. static bool valueIsKnownNeverF32Denorm(const MachineRegisterInfo &MRI, Register Src) { - Register ExtSrc; - if (mi_match(Src, MRI, m_GFPExt(m_Reg(ExtSrc)))) - return MRI.getType(ExtSrc) == LLT::scalar(16); + const MachineInstr *DefMI = MRI.getVRegDef(Src); + switch (DefMI->getOpcode()) { + case TargetOpcode::G_INTRINSIC: { + switch (cast(DefMI)->getIntrinsicID()) { + case Intrinsic::amdgcn_frexp_mant: + return true; + default: + break; + } + + break; + } + case TargetOpcode::G_FFREXP: { + if (DefMI->getOperand(0).getReg() == Src) + return true; + break; + } + case TargetOpcode::G_FPEXT: { + return MRI.getType(DefMI->getOperand(1).getReg()) == LLT::scalar(16); + } + default: + return false; + } + return false; } Index: llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll +++ llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll @@ -3562,111 +3562,57 @@ } define float @v_sqrt_f32_frexp_src(float %x) { -; SDAG-IEEE-LABEL: v_sqrt_f32_frexp_src: -; SDAG-IEEE: ; %bb.0: -; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 -; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0 -; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 -; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 -; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 -; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 -; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 -; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 -; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] -; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 -; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 -; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 -; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] -; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 -; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-IEEE-LABEL: v_sqrt_f32_frexp_src: -; GISEL-IEEE: ; %bb.0: -; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0 -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 -; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 -; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 -; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 -; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 -; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 -; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 -; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] -; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 -; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] -; -; SDAG-DAZ-LABEL: v_sqrt_f32_frexp_src: -; SDAG-DAZ: ; %bb.0: -; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x7f800000 -; SDAG-DAZ-NEXT: v_frexp_mant_f32_e32 v1, v0 -; SDAG-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 -; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 -; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 -; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 -; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 -; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 -; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 -; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 -; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 -; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 -; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 -; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 -; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 -; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: v_sqrt_f32_frexp_src: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 +; SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SDAG-NEXT: s_mov_b32 s4, 0xf800000 +; SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 +; SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SDAG-NEXT: v_rsq_f32_e32 v1, v0 +; SDAG-NEXT: v_mul_f32_e32 v2, v0, v1 +; SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1 +; SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5 +; SDAG-NEXT: v_fma_f32 v2, v2, v3, v2 +; SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0 +; SDAG-NEXT: v_fma_f32 v1, v1, v3, v1 +; SDAG-NEXT: v_fma_f32 v1, v4, v1, v2 +; SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 +; SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; SDAG-NEXT: v_mov_b32_e32 v2, 0x260 +; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-DAZ-LABEL: v_sqrt_f32_frexp_src: -; GISEL-DAZ: ; %bb.0: -; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GISEL-DAZ-NEXT: v_frexp_mant_f32_e32 v1, v0 -; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 -; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 -; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 -; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 -; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 -; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 -; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 -; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 -; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 -; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 -; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 -; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] +; GISEL-LABEL: v_sqrt_f32_frexp_src: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 +; GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_rsq_f32_e32 v1, v0 +; GISEL-NEXT: v_mul_f32_e32 v2, v0, v1 +; GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1 +; GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5 +; GISEL-NEXT: v_fma_f32 v2, v2, v3, v2 +; GISEL-NEXT: v_fma_f32 v1, v1, v3, v1 +; GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0 +; GISEL-NEXT: v_fma_f32 v1, v3, v1, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0x260 +; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] %frexp = call { float, i32 } @llvm.frexp.f32.i32(float %x) %frexp.mant = extractvalue { float, i32 } %frexp, 0 %result = call float @llvm.sqrt.f32(float %frexp.mant)