diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -346,5 +346,14 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x64_i8, "V4iV2iV4iV4iiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x32_i8, "V16iV2iV4iV16iiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -250,6 +250,7 @@ break; case GK_GFX940: Features["gfx940-insts"] = true; + Features["fp8-insts"] = true; LLVM_FALLTHROUGH; case GK_GFX90A: Features["gfx90a-insts"] = true; diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -64,7 +64,7 @@ // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" +// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl @@ -0,0 +1,60 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940 + +typedef float v2f __attribute__((ext_vector_type(2))); + +// CHECK-GFX940-LABEL: @test_cvt_f32_bf8 +// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0) +void test_cvt_f32_bf8(global int* out, int a) +{ + *out = __builtin_amdgcn_cvt_f32_bf8(a, 0); +} + +// CHECK-GFX940-LABEL: @test_cvt_f32_fp8 +// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1) +void test_cvt_f32_fp8(global int* out, int a) +{ + *out = __builtin_amdgcn_cvt_f32_fp8(a, 1); +} + +// CHECK-GFX940-LABEL: @test_cvt_pk_f32_bf8 +// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false) +void test_cvt_pk_f32_bf8(global v2f* out, int a) +{ + *out = __builtin_amdgcn_cvt_pk_f32_bf8(a, false); +} + +// CHECK-GFX940-LABEL: @test_cvt_pk_f32_fp8 +// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true) +void test_cvt_pk_f32_fp8(global v2f* out, int a) +{ + *out = __builtin_amdgcn_cvt_pk_f32_fp8(a, true); +} + +// CHECK-GFX940-LABEL: @test_cvt_pk_bf8_f32 +// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false) +void test_cvt_pk_bf8_f32(global int* out, int old, float a, float b) +{ + *out = __builtin_amdgcn_cvt_pk_bf8_f32(a, b, old, false); +} + +// CHECK-GFX940-LABEL: @test_cvt_pk_fp8_f32 +// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true) +void test_cvt_pk_fp8_f32(global int* out, int old, float a, float b) +{ + *out = __builtin_amdgcn_cvt_pk_fp8_f32(a, b, old, true); +} + +// CHECK-GFX940-LABEL: @test_cvt_sr_bf8_f32 +// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2) +void test_cvt_sr_bf8_f32(global int* out, int old, float a, int b) +{ + *out = __builtin_amdgcn_cvt_sr_bf8_f32(a, b, old, 2); +} + +// CHECK-GFX940-LABEL: @test_cvt_sr_fp8_f32 +// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3) +void test_cvt_sr_fp8_f32(global int* out, int old, float a, int b) +{ + *out = __builtin_amdgcn_cvt_sr_fp8_f32(a, b, old, 3); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2320,6 +2320,58 @@ def int_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUMSmfmacIntrinsic; def int_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUMSmfmacIntrinsic; +// llvm.amdgcn.cvt.f32.bf8 float vdst, int srcA, imm byte_sel [0..3] +// byte_sel selects byte from srcA. +def int_amdgcn_cvt_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_bf8">, + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + +// llvm.amdgcn.cvt.f32.fp8 float vdst, int srcA, imm byte_sel [0..3] +def int_amdgcn_cvt_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8">, + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + +// llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel +// word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes. +def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">, + Intrinsic<[llvm_v2f32_ty], + [llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + +// llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel. +def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">, + Intrinsic<[llvm_v2f32_ty], + [llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + +// llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel +// word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes. +def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + +// llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel +def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + +// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +// byte_sel selects byte to write into vdst. +def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + +// llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -585,6 +585,12 @@ "Has mAI instructions" >; +def FeatureFP8Insts : SubtargetFeature<"fp8-insts", + "HasFP8Insts", + "true", + "Has fp8 and bf8 instructions" +>; + def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "HasPkFmacF16Inst", "true", @@ -1124,6 +1130,7 @@ Feature64BitDPP, FeaturePackedFP32Ops, FeatureMAIInsts, + FeatureFP8Insts, FeaturePkFmacF16Inst, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, @@ -1704,6 +1711,9 @@ def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; +def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, + AssemblerPredicate<(all_of FeatureFP8Insts)>; + def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8257,6 +8257,12 @@ const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; + if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || + Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { + Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods + Inst.addOperand(Inst.getOperand(0)); + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); @@ -9061,12 +9067,27 @@ // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI, 0); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOModSI, 0); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::dst_sel) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::dst_unused) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTySdwaDstUnused, + DstUnused::UNUSED_PRESERVE); } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -145,6 +145,7 @@ bool HasDot7Insts = false; bool HasDot8Insts = false; bool HasMAIInsts = false; + bool HasFP8Insts = false; bool HasPkFmacF16Inst = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; @@ -721,6 +722,10 @@ return HasMAIInsts; } + bool hasFP8Insts() const { + return HasFP8Insts; + } + bool hasPkFmacF16Inst() const { return HasPkFmacF16Inst; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -324,7 +324,8 @@ // XXX - do v2i16 instructions? class isIntType { - bit ret = !or(!eq(SrcVT.Value, i16.Value), + bit ret = !or(!eq(SrcVT.Value, i8.Value), + !eq(SrcVT.Value, i16.Value), !eq(SrcVT.Value, i32.Value), !eq(SrcVT.Value, i64.Value), !eq(SrcVT.Value, v4i16.Value), @@ -1411,6 +1412,10 @@ def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; +def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { + let Name = "SDWAWithBin32InputMods"; + let ParserMethod = "parseRegOrImm"; +} class IntSDWAInputMods : InputMods { @@ -1419,6 +1424,7 @@ def Int16SDWAInputMods : IntSDWAInputMods; def Int32SDWAInputMods : IntSDWAInputMods; +def Bin32SDWAInputMods : IntSDWAInputMods; def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -499,6 +499,59 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; } // End SubtargetPredicate = isGFX9Only +class VOPProfile_Base_CVT_F32_F8 : VOPProfileI2F { + let HasExtSDWA = 1; + let HasExtSDWA9 = 1; + let HasExt = 1; + let DstRCSDWA = getVALUDstForVT.ret; + let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, + clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); + let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel + let AsmSDWA9 = AsmSDWA; + let EmitDstSel = 0; +} + +def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 ; +def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 ; + +let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; + defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; + defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; + defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; +} + +class Cvt_F32_F8_Pat : GCNPat< + (f32 (node i32:$src, index)), + !if (index, + (inst_sdwa 0, $src, 0, 0, index), + (inst_e32 $src)) +>; + +foreach Index = [0, 1, 2, 3] in { + def : Cvt_F32_F8_Pat; + def : Cvt_F32_F8_Pat; +} + +class Cvt_PK_F32_F8_Pat : GCNPat< + (v2f32 (node i32:$src, index)), + !if (index, + (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), + (inst_e32 $src)) +>; + +foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat; + def : Cvt_PK_F32_F8_Pat; +} + let SubtargetPredicate = isGFX10Plus in { defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT>; @@ -1106,11 +1159,36 @@ } +multiclass VOP1_Real_NoDstSel_SDWA_gfx9 op> { + let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { + defm NAME : VOP1_Real_e32e64_vi ; + } + + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP1_SDWA9Ae (NAME#"_sdwa").Pfl> { + let Inst{42-40} = 6; + } + + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx9 : + VOP_DPP_Real(NAME#"_dpp"), SIEncodingFamily.GFX9>, + VOP1_DPPe(NAME#"_dpp")>; +} + defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; +let OtherPredicates = [HasFP8Insts] in { +defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; +defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; +defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; +defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; +} + //===----------------------------------------------------------------------===// // GFX10 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -481,6 +481,30 @@ }]; } +def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel); + let HasClamp = 0; + let HasExtVOP3DPP = 0; +} + +def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 1; + let AsmVOP3OpSel = !subst(", $src2_modifiers", "", + getAsmVOP3OpSel<3, HasClamp, + HasSrc0FloatMods, HasSrc1FloatMods, + HasSrc2FloatMods>.ret); + let HasExtVOP3DPP = 0; +} + let SubtargetPredicate = isGFX9Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile>; @@ -526,6 +550,43 @@ let SubtargetPredicate = isGFX940Plus in defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile>; +let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { + defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>; + defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>; + } + + // These instructions have non-standard use of op_sel. In particular they are + // using op_sel bits 2 and 3 while only having two sources. Therefore dummy + // src2 is used to hold the op_sel value. + let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in { + defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; + defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; + } +} + +class Cvt_PK_F8_F32_Pat : GCNPat< + (i32 (node f32:$src0, f32:$src1, i32:$old, index)), + (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0)) +>; + +class Cvt_SR_F8_F32_Pat index, VOP3_Pseudo inst> : GCNPat< + (i32 (node f32:$src0, i32:$src1, i32:$old, index)), + (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, + !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0)) +>; + +foreach Index = [0, -1] in { + def : Cvt_PK_F8_F32_Pat; + def : Cvt_PK_F8_F32_Pat; +} + +foreach Index = [0, 1, 2, 3] in { + def : Cvt_SR_F8_F32_Pat; + def : Cvt_SR_F8_F32_Pat; +} + class ThreeOp_i32_Pats : GCNPat < // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. (ThreeOpFrag i32:$src0, i32:$src1, i32:$src2), @@ -1161,6 +1222,13 @@ VOP3OpSel_gfx9 (NAME#"_e64").Pfl>; } +multiclass VOP3OpSel_Real_gfx9_forced_opsel2 op> { + def _vi : VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 (NAME#"_e64").Pfl> { + let Inst{13} = src2_modifiers{2}; // op_sel(2) + } +} + multiclass VOP3Interp_Real_vi op> { def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, VOP3Interp_vi (NAME).Pfl>; @@ -1352,3 +1420,10 @@ defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; + +let OtherPredicates = [HasFP8Insts] in { +defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; +defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; +defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; +defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll @@ -0,0 +1,190 @@ +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32) +declare float @llvm.amdgcn.cvt.f32.fp8(i32, i32) +declare <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32, i1) +declare <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32, i1) +declare i32 @llvm.amdgcn.cvt.pk.bf8.f32(float, float, i32, i1) +declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1) +declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32) +declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32) + +; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte0: +; GCN: v_cvt_f32_bf8_e32 v0, v0{{$}} +define float @test_cvt_f32_bf8_byte0(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte1: +; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1 +define float @test_cvt_f32_bf8_byte1(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte2: +; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2 +define float @test_cvt_f32_bf8_byte2(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 2) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte3: +; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3 +define float @test_cvt_f32_bf8_byte3(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 3) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte0: +; GCN: v_cvt_f32_fp8_e32 v0, v0{{$}} +define float @test_cvt_f32_fp8_byte0(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte1: +; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1 +define float @test_cvt_f32_fp8_byte1(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte2: +; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2 +define float @test_cvt_f32_fp8_byte2(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 2) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte3: +; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3 +define float @test_cvt_f32_fp8_byte3(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 3) + ret float %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_f32_bf8_word0: +; GCN: v_cvt_pk_f32_bf8_e32 v[0:1], v0{{$}} +define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false) + ret <2 x float> %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_f32_bf8_word1: +; GCN: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1 +define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true) + ret <2 x float> %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_f32_fp8_word0: +; GCN: v_cvt_pk_f32_fp8_e32 v[0:1], v0{{$}} +define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false) + ret <2 x float> %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_f32_fp8_word1: +; GCN: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1 +define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true) + ret <2 x float> %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_bf8_f32_word0: +; GCN: v_cvt_pk_bf8_f32 v2, v0, v1{{$}} +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 false) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_bf8_f32_word1: +; GCN: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 true) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_fp8_f32_word0: +; GCN: v_cvt_pk_fp8_f32 v2, v0, v1{{$}} +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 false) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_pk_fp8_f32_word1: +; GCN: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 true) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte0: +; GCN: v_cvt_sr_bf8_f32 v2, v0, v1{{$}} +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 0) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte1: +; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 1) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte2: +; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 2) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte3: +; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 3) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte0: +; GCN: v_cvt_sr_fp8_f32 v2, v0, v1{{$}} +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 0) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte1: +; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 1) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte2: +; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 2) + ret i32 %ret +} + +; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte3: +; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1] +; GCN: v_mov_b32_e32 v0, v2 +define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) { + %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3) + ret i32 %ret +} diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s --- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s @@ -401,3 +401,211 @@ // GFX10: error: instruction not supported on this GPU // GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e] +v_cvt_f32_bf8 v1, s3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e] +v_cvt_f32_bf8 v1, 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e] +v_cvt_f32_bf8 v1, v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00] +v_cvt_f32_bf8 v1, s3 src0_sel:BYTE_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff] +v_cvt_f32_bf8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08] +v_cvt_f32_bf8 v1, s3 mul:2 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00] +v_cvt_f32_bf8 v1, s3 clamp mul:2 src0_sel:BYTE_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00] +v_cvt_f32_bf8 v1, s3 clamp + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e] +v_cvt_f32_fp8 v1, s3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e] +v_cvt_f32_fp8 v1, 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e] +v_cvt_f32_fp8 v1, v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00] +v_cvt_f32_fp8 v1, s3 src0_sel:BYTE_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff] +v_cvt_f32_fp8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08] +v_cvt_f32_fp8 v1, s3 mul:2 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00] +v_cvt_f32_fp8 v1, s3 clamp mul:2 src0_sel:BYTE_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00] +v_cvt_f32_fp8 v1, s3 clamp + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00] +v_cvt_f32_fp8 v1, 3 src0_sel:BYTE_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e] +v_cvt_pk_f32_bf8 v[2:3], s3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e] +v_cvt_pk_f32_bf8 v[2:3], 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e] +v_cvt_pk_f32_bf8 v[2:3], v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00] +v_cvt_pk_f32_bf8 v[2:3], s3 src0_sel:WORD_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff] +v_cvt_pk_f32_bf8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] +v_cvt_pk_f32_bf8 v[2:3], s3 mul:2 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00] +v_cvt_pk_f32_bf8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00] +v_cvt_pk_f32_bf8 v[2:3], s3 clamp + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e] +v_cvt_pk_f32_fp8 v[2:3], s3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e] +v_cvt_pk_f32_fp8 v[2:3], 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e] +v_cvt_pk_f32_fp8 v[2:3], v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00] +v_cvt_pk_f32_fp8 v[2:3], s3 src0_sel:WORD_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00] +v_cvt_pk_f32_fp8 v[2:3], 3 src0_sel:WORD_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff] +v_cvt_pk_f32_fp8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] +v_cvt_pk_f32_fp8 v[2:3], s3 mul:2 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00] +v_cvt_pk_f32_fp8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00] +v_cvt_pk_f32_fp8 v[2:3], s3 clamp + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00] +v_cvt_pk_bf8_f32 v1, v2, v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20] +v_cvt_pk_bf8_f32 v1, -v2, |v3| + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00] +v_cvt_pk_bf8_f32 v1, s2, 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00] +v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00] +v_cvt_pk_fp8_f32 v1, v2, v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20] +v_cvt_pk_fp8_f32 v1, -v2, |v3| + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00] +v_cvt_pk_fp8_f32 v1, s2, 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00] +v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00] +v_cvt_sr_bf8_f32 v1, v2, v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00] +v_cvt_sr_bf8_f32 v1, s2, 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00] +v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00] +v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20] +v_cvt_sr_bf8_f32 v1, -|s2|, v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00] +v_cvt_sr_fp8_f32 v1, v2, v3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00] +v_cvt_sr_fp8_f32 v1, s2, 3 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00] +v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00] +v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20] +v_cvt_sr_fp8_f32 v1, -|s2|, v3 diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s --- a/llvm/test/MC/AMDGPU/gfx940_err.s +++ b/llvm/test/MC/AMDGPU/gfx940_err.s @@ -72,6 +72,30 @@ v_dot2_u32_u16 v0, 1, v0, s2 op_sel:[0,1,0,1] op_sel_hi:[0,0,1,1] // GFX940: error: invalid op_sel operand +v_cvt_f32_fp8 v1, sext(v3) src0_sel:BYTE_1 +// GFX940: error: not a valid operand. + +v_cvt_pk_f32_bf8 v[2:3], sext(v3) src0_sel:BYTE_1 +// GFX940: error: not a valid operand. + +v_cvt_sr_bf8_f32 v1, v2, -v3 +// GFX940: error: not a valid operand. + +v_cvt_sr_fp8_f32 v1, v2, -v3 +// GFX940: error: not a valid operand. + +v_cvt_sr_fp8_f32 v1, v2, v3 clamp +// GFX940: error: invalid operand for instruction + +v_cvt_sr_fp8_f32 v1, v2, v3 mul:2 +// GFX940: error: invalid operand for instruction + +v_cvt_pk_fp8_f32 v1, v2, v3 clamp +// GFX940: error: invalid operand for instruction + +v_cvt_pk_fp8_f32 v1, v2, v3 mul:2 +// GFX940: error: invalid operand for instruction + s_getreg_b32 s1, hwreg(HW_REG_FLAT_SCR_LO) // GFX940: error: specified hardware register is not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt @@ -263,3 +263,159 @@ # GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] 0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03 + +# GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e] +0x03,0xaa,0x02,0x7e + +# GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e] +0x83,0xaa,0x02,0x7e + +# GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e] +0x03,0xab,0x02,0x7e + +# GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00] +0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00 + +# GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff] +0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff + +# GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08] +0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08 + +# GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00] +0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00 + +# GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00] +0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00 + +# GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e] +0x03,0xa8,0x02,0x7e + +# GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e] +0x83,0xa8,0x02,0x7e + +# GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e] +0x03,0xa9,0x02,0x7e + +# GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00] +0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00 + +# GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff] +0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff + +# GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08] +0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08 + +# GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00] +0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00 + +# GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00] +0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00 + +# GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00] +0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00 + +# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e] +0x03,0xae,0x04,0x7e + +# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e] +0x83,0xae,0x04,0x7e + +# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e] +0x03,0xaf,0x04,0x7e + +# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00] +0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00 + +# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff] +0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff + +# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] +0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08 + +# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00] +0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00 + +# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00] +0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00 + +# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e] +0x03,0xac,0x04,0x7e + +# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e] +0x83,0xac,0x04,0x7e + +# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e] +0x03,0xad,0x04,0x7e + +# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00] +0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00 + +# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00] +0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00 + +# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff] +0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff + +# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] +0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08 + +# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00] +0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00 + +# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00] +0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00 + +# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20] +0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20 + +# GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00 + +# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20] +0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20 + +# GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00 + +# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00 + +# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00] +0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20] +0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20 + +# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00 + +# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00] +0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00 + +# GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20] +0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20