Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10856,10 +10856,15 @@ return true; if (auto *CFP = dyn_cast(Op)) { - auto F = CFP->getValueAPF(); + const auto &F = CFP->getValueAPF(); if (F.isNaN() && F.isSignaling()) return false; - return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType()); + if (!F.isDenormal()) + return true; + + DenormalMode Mode = + DAG.getMachineFunction().getDenormalMode(F.getSemantics()); + return Mode == DenormalMode::getIEEE(); } // If source is a result of another standard FP operation it is already in @@ -10928,6 +10933,7 @@ // snans will be quieted, so we only need to worry about denormals. if (Subtarget->supportsMinMaxDenormModes() || + // FIXME: denormalsEnabledForType is broken for dynamic denormalsEnabledForType(DAG, Op.getValueType())) return true; @@ -11007,6 +11013,7 @@ [[fallthrough]]; } default: + // FIXME: denormalsEnabledForType is broken for dynamic return denormalsEnabledForType(DAG, Op.getValueType()) && DAG.isKnownNeverSNaN(Op); } @@ -11028,8 +11035,11 @@ if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) { if (FCR->Value.isSignaling()) return false; - return !FCR->Value.isDenormal() || - denormalsEnabledForType(MRI.getType(FCR->VReg), MF); + if (!FCR->Value.isDenormal()) + return true; + + DenormalMode Mode = MF.getDenormalMode(FCR->Value.getSemantics()); + return Mode == DenormalMode::getIEEE(); } if (MaxDepth == 0) @@ -11072,6 +11082,7 @@ case AMDGPU::G_FMINNUM_IEEE: case AMDGPU::G_FMAXNUM_IEEE: { if (Subtarget->supportsMinMaxDenormModes() || + // FIXME: denormalsEnabledForType is broken for dynamic denormalsEnabledForType(MRI.getType(Reg), MF)) return true; @@ -11128,9 +11139,16 @@ SDValue SITargetLowering::getCanonicalConstantFP( SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const { // Flush denormals to 0 if not enabled. - if (C.isDenormal() && !denormalsEnabledForType(DAG, VT)) { - return DAG.getConstantFP(APFloat::getZero(C.getSemantics(), - C.isNegative()), SL, VT); + if (C.isDenormal()) { + DenormalMode Mode = + DAG.getMachineFunction().getDenormalMode(C.getSemantics()); + if (Mode == DenormalMode::getPreserveSign()) { + return DAG.getConstantFP( + APFloat::getZero(C.getSemantics(), C.isNegative()), SL, VT); + } + + if (Mode != DenormalMode::getIEEE()) + return SDValue(); } if (C.isNaN()) { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir @@ -36,6 +36,7 @@ $vgpr0 = COPY %1(s32) ... +# FIXME: Mode fields are redundant and not considered. --- name: test_denormal_fconstant tracksRegLiveness: true @@ -49,8 +50,7 @@ ; CHECK-LABEL: name: test_denormal_fconstant ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.618950e-319 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[C]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64) %0:_(s64) = G_FCONSTANT double 0x0000000000008000 %1:_(s64) = G_FCANONICALIZE %0 $vgpr0_vgpr1 = COPY %1(s64) Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fcanonicalize.ll +++ llvm/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -465,6 +465,114 @@ ret void } +define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic(ptr addrspace(1) %out) #5 { +; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX678-NEXT: s_mov_b32 s2, 0x7fffff +; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2 +; GFX678-NEXT: s_waitcnt lgkmcnt(0) +; GFX678-NEXT: v_mov_b32_e32 v0, s0 +; GFX678-NEXT: v_mov_b32_e32 v1, s1 +; GFX678-NEXT: flat_store_dword v[0:1], v2 +; GFX678-NEXT: s_endpgm +; +; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: s_mov_b32 s2, 0x7fffff +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_max_f32_e64 v1, s2, s2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + store float %canonicalized, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out(ptr addrspace(1) %out) #6 { +; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX678-NEXT: s_mov_b32 s2, 0x7fffff +; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2 +; GFX678-NEXT: s_waitcnt lgkmcnt(0) +; GFX678-NEXT: v_mov_b32_e32 v0, s0 +; GFX678-NEXT: v_mov_b32_e32 v1, s1 +; GFX678-NEXT: flat_store_dword v[0:1], v2 +; GFX678-NEXT: s_endpgm +; +; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: s_mov_b32 s2, 0x7fffff +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_max_f32_e64 v1, s2, s2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + store float %canonicalized, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in(ptr addrspace(1) %out) #7 { +; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX678-NEXT: s_mov_b32 s2, 0x7fffff +; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2 +; GFX678-NEXT: s_waitcnt lgkmcnt(0) +; GFX678-NEXT: v_mov_b32_e32 v0, s0 +; GFX678-NEXT: v_mov_b32_e32 v1, s1 +; GFX678-NEXT: flat_store_dword v[0:1], v2 +; GFX678-NEXT: s_endpgm +; +; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: s_mov_b32 s2, 0x7fffff +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_max_f32_e64 v1, s2, s2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + store float %canonicalized, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #3 { ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f32: ; GFX678: ; %bb.0: @@ -2400,3 +2508,6 @@ attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" } attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } +attributes #5 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" } +attributes #6 = { nounwind "denormal-fp-math-f32"="dynamic,ieee" } +attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,dynamic" }