Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -468,6 +468,19 @@ if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) return true; + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) { + return !FPVal->getValueAPF().isNaN() || + (SNaN && !FPVal->getValueAPF().isSignaling()); + } + + if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { + for (const auto &Op : DefMI->uses()) + if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN)) + return false; + return true; + } + if (SNaN) { // FP operations quiet. For now, just handle the ones inserted during // legalization. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -793,22 +793,19 @@ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[C]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[C]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[C]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 @@ -873,8 +870,7 @@ ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s16) = G_FCONSTANT half 0xH0000 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -793,22 +793,19 @@ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[C]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[C]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[C]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 @@ -873,8 +870,7 @@ ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s16) = G_FCONSTANT half 0xH0000