Index: llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.td +++ llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1862,6 +1862,9 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">; +def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; +def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1172,6 +1172,9 @@ // \returns true if the target supports the pre-NGG legacy geometry path. bool hasLegacyGeometry() const { return getGeneration() < GFX11; } + // \returns true is FP8/BF8 VOP1 form of conversion to F32 in unreliable. + bool hasCvtFP8VOP1Bug() const { return true; } + /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -591,7 +591,21 @@ (inst_e32 $src)) >; -foreach Index = [0, 1, 2, 3] in { +let OtherPredicates = [HasCvtFP8VOP1Bug] in { + def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), + (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; + def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), + (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>; +} + +let OtherPredicates = [HasNoCvtFP8VOP1Bug] in { + def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), + (V_CVT_F32_FP8_e32 $src)>; + def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), + (V_CVT_F32_BF8_e32 $src)>; +} + +foreach Index = [1, 2, 3] in { def : Cvt_F32_F8_Pat; def : Cvt_F32_F8_Pat