Index: include/llvm/BinaryFormat/ELF.h =================================================================== --- include/llvm/BinaryFormat/ELF.h +++ include/llvm/BinaryFormat/ELF.h @@ -705,6 +705,8 @@ EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031, // AMDGCN GFX10. EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033, + EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034, + EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035, // Reserved for AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027, @@ -713,7 +715,7 @@ // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1010, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1012, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. Index: include/llvm/Support/TargetParser.h =================================================================== --- include/llvm/Support/TargetParser.h +++ include/llvm/Support/TargetParser.h @@ -124,9 +124,11 @@ GK_GFX909 = 65, GK_GFX1010 = 71, + GK_GFX1011 = 72, + GK_GFX1012 = 73, GK_AMDGCN_FIRST = GK_GFX600, - GK_AMDGCN_LAST = GK_GFX1010, + GK_AMDGCN_LAST = GK_GFX1012, }; /// Instruction set architecture version. Index: lib/ObjectYAML/ELFYAML.cpp =================================================================== --- lib/ObjectYAML/ELFYAML.cpp +++ lib/ObjectYAML/ELFYAML.cpp @@ -412,6 +412,8 @@ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH); BCase(EF_AMDGPU_XNACK); BCase(EF_AMDGPU_SRAM_ECC); break; Index: lib/Support/TargetParser.cpp =================================================================== --- lib/Support/TargetParser.cpp +++ lib/Support/TargetParser.cpp @@ -62,7 +62,7 @@ // This table should be sorted by the value of GPUKind // Don't bother listing the implicitly true features -constexpr GPUInfo AMDGCNGPUs[34] = { +constexpr GPUInfo AMDGCNGPUs[36] = { // Name Canonical Kind Features // Name {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, @@ -99,6 +99,8 @@ {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, }; const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) { @@ -197,6 +199,8 @@ case GK_GFX906: return {9, 0, 6}; case GK_GFX909: return {9, 0, 9}; case GK_GFX1010: return {10, 1, 0}; + case GK_GFX1011: return {10, 1, 1}; + case GK_GFX1012: return {10, 1, 2}; default: return {0, 0, 0}; } } Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -378,6 +378,18 @@ "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" >; +def FeatureDot5Insts : SubtargetFeature<"dot5-insts", + "HasDot5Insts", + "true", + "Has v_dot2c_f32_f16 instruction" +>; + +def FeatureDot6Insts : SubtargetFeature<"dot6-insts", + "HasDot6Insts", + "true", + "Has v_dot4c_i32_i8 instruction" +>; + def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support", "DoesNotSupportSRAMECC", "true", @@ -773,6 +785,41 @@ FeatureDoesNotSupportXNACK, FeatureCodeObjectV3])>; +def FeatureISAVersion10_1_1 : FeatureSet< + !listconcat(FeatureGroup.GFX10_1_Bugs, + [FeatureGFX10, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureNSAEncoding, + FeatureWavefrontSize64, + FeatureScalarStores, + FeatureScalarAtomics, + FeatureScalarFlatScratchInsts, + FeatureDoesNotSupportXNACK, + FeatureCodeObjectV3])>; + +def FeatureISAVersion10_1_2 : FeatureSet< + !listconcat(FeatureGroup.GFX10_1_Bugs, + [FeatureGFX10, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureNSAEncoding, + FeatureWavefrontSize64, + FeatureScalarStores, + FeatureScalarAtomics, + FeatureScalarFlatScratchInsts, + FeatureLdsMisalignedBug, + FeatureDoesNotSupportXNACK, + FeatureCodeObjectV3])>; + //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { @@ -1015,6 +1062,11 @@ def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, AssemblerPredicate<"FeatureDot2Insts">; +def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, + AssemblerPredicate<"FeatureDot5Insts">; + +def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, + AssemblerPredicate<"FeatureDot6Insts">; def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -337,6 +337,8 @@ bool HasDLInsts; bool HasDot1Insts; bool HasDot2Insts; + bool HasDot5Insts; + bool HasDot6Insts; bool EnableSRAMECC; bool DoesNotSupportSRAMECC; bool HasNoSdstCMPX; @@ -705,6 +707,14 @@ return HasDot2Insts; } + bool hasDot5Insts() const { + return HasDot5Insts; + } + + bool hasDot6Insts() const { + return HasDot6Insts; + } + bool isSRAMECCEnabled() const { return EnableSRAMECC; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -234,6 +234,8 @@ HasDLInsts(false), HasDot1Insts(false), HasDot2Insts(false), + HasDot5Insts(false), + HasDot6Insts(false), EnableSRAMECC(false), DoesNotSupportSRAMECC(false), HasNoSdstCMPX(false), Index: lib/Target/AMDGPU/GCNProcessors.td =================================================================== --- lib/Target/AMDGPU/GCNProcessors.td +++ lib/Target/AMDGPU/GCNProcessors.td @@ -171,3 +171,11 @@ def : ProcessorModel<"gfx1010", GFX10SpeedModel, FeatureISAVersion10_1_0.Features >; + +def : ProcessorModel<"gfx1011", GFX10SpeedModel, + FeatureISAVersion10_1_1.Features +>; + +def : ProcessorModel<"gfx1012", GFX10SpeedModel, + FeatureISAVersion10_1_2.Features +>; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,6 +93,8 @@ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; } @@ -141,6 +143,8 @@ case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; + case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; + case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -318,6 +318,20 @@ def VOP_MAC_F16 : VOP_MAC ; def VOP_MAC_F32 : VOP_MAC ; +class VOP_DOT_ACC : VOP_MAC { + let HasClamp = 0; + let HasExtSDWA = 0; + let HasModifiers = 1; + let HasOpSel = 0; + let IsPacked = 0; +} + +def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC { + let Src0ModDPP = FPVRegInputMods; + let Src1ModDPP = FPVRegInputMods; +} +def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC; + // Write out to vcc or arbitrary SGPR. def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { let Asm32 = "$vdst, vcc, $src0, $src1"; @@ -634,6 +648,31 @@ } // End SubtargetPredicate = HasDLInsts +let Constraints = "$vdst = $src2", + DisableEncoding="$src2", + isConvertibleToThreeAddress = 1, + isCommutable = 1 in { + let SubtargetPredicate = HasDot5Insts in + defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; + let SubtargetPredicate = HasDot6Insts in + defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; +} + +let AddedComplexity = 30 in { + def : GCNPat< + (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), + (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) + > { + let SubtargetPredicate = HasDot5Insts; + } + def : GCNPat< + (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), + (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) + > { + let SubtargetPredicate = HasDot6Insts; + } +} // End AddedComplexity = 30 + let SubtargetPredicate = isGFX10Plus in { def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; @@ -1492,3 +1531,18 @@ defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; } // End SubtargetPredicate = HasDLInsts + +multiclass VOP2_Real_DOT_ACC_gfx10 op> : + VOP2_Real_e32_gfx10, + VOP2_Real_dpp_gfx10, + VOP2_Real_dpp8_gfx10; + +let SubtargetPredicate = HasDot5Insts in { + // NB: Opcode conflicts with V_DOT8C_I32_I4 + // This opcode exists in gfx 10.1* only + defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; +} + +let SubtargetPredicate = HasDot6Insts in { + defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; +} Index: lib/Target/AMDGPU/VOP3PInstructions.td =================================================================== --- lib/Target/AMDGPU/VOP3PInstructions.td +++ lib/Target/AMDGPU/VOP3PInstructions.td @@ -412,3 +412,20 @@ defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>; defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>; defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>; + +let SubtargetPredicate = HasDot2Insts in { + +defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>; +defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>; +defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>; +defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>; +defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>; + +} // End SubtargetPredicate = HasDot2Insts + +let SubtargetPredicate = HasDot1Insts in { + +defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>; +defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>; + +} // End SubtargetPredicate = HasDot1Insts Index: test/CodeGen/AMDGPU/elf-header-flags-mach.ll =================================================================== --- test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -48,6 +48,8 @@ ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s ; ARCH-R600: Arch: r600 ; ARCH-GCN: Arch: amdgcn @@ -89,6 +91,8 @@ ; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) ; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31) ; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33) +; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34) +; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35) ; ALL: ] define amdgpu_kernel void @elf_header() { Index: test/CodeGen/AMDGPU/fdot2.ll =================================================================== --- test/CodeGen/AMDGPU/fdot2.ll +++ test/CodeGen/AMDGPU/fdot2.ll @@ -1,5 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900 -; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-UNSAFE +; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE +; RUN: llc -march=amdgcn -mcpu=gfx1011 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT +; RUN: llc -march=amdgcn -mcpu=gfx1012 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906 ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT @@ -14,7 +16,8 @@ ; GFX906: v_mul_f16_e32 ; GFX906: v_mul_f16_e32 -; GFX906-UNSAFE: v_fma_f16 +; GFX906-DL-UNSAFE: v_fma_f16 +; GFX10-CONTRACT: v_fmac_f16 ; GFX906-CONTRACT: v_mac_f16_e32 ; GFX906-DENORM-CONTRACT: v_fma_f16 @@ -50,7 +53,8 @@ ; GFX906: v_mad_f32 ; GFX906: v_mac_f32_e32 -; GFX906-UNSAFE: v_dot2_f32_f16 +; GFX906-DL-UNSAFE: v_dot2_f32_f16 +; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32 ; GFX906-CONTRACT: v_dot2_f32_f16 @@ -90,7 +94,8 @@ ; GFX906: v_mad_f32 ; GFX906: v_mac_f32_e32 -; GFX906-UNSAFE: v_dot2_f32_f16 +; GFX906-DL-UNSAFE: v_dot2_f32_f16 +; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32 ; GFX906-CONTRACT: v_dot2_f32_f16 ; GFX906-DENORM-CONTRACT: v_dot2_f32_f16 @@ -127,7 +132,7 @@ ; GFX906: v_mad_f32 ; GFX906: v_mac_f32_e32 -; GFX906-UNSAFE: v_fma_mix_f32 +; GCN-DL-UNSAFE: v_fma_mix_f32 ; GFX906-CONTRACT: v_fma_mix_f32 ; GFX906-DENORM-CONTRACT: v_fma_mix_f32 @@ -164,7 +169,7 @@ ; GFX906: v_mad_f32 ; GFX906: v_mac_f32_e32 -; GFX906-UNSAFE: v_fma_mix_f32 +; GCN-DL-UNSAFE: v_fma_mix_f32 ; GFX906-CONTRACT: v_fma_mix_f32 ; GFX906-DENORM-CONTRACT: v_fma_mix_f32 @@ -201,7 +206,7 @@ ; GFX906: v_mad_f32 ; GFX906: v_mac_f32_e32 -; GFX906-UNSAFE: v_fma_mix_f32 +; GCN-DL-UNSAFE: v_fma_mix_f32 ; GFX906-CONTRACT: v_fma_mix_f32 ; GFX906-DENORM-CONTRACT: v_fma_mix_f32 Index: test/CodeGen/AMDGPU/hsa-note-no-func.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -25,6 +25,8 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1011 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1011 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1012 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1012 %s ; HSA: .hsa_code_object_version 2,1 ; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU" @@ -44,3 +46,5 @@ ; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU" ; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU" ; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU" +; HSA-GFX1011: .hsa_code_object_isa 10,1,1,"AMD","AMDGPU" +; HSA-GFX1012: .hsa_code_object_isa 10,1,2,"AMD","AMDGPU" Index: test/CodeGen/AMDGPU/lds-misaligned-bug.ll =================================================================== --- test/CodeGen/AMDGPU/lds-misaligned-bug.ll +++ test/CodeGen/AMDGPU/lds-misaligned-bug.ll @@ -1,4 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VECT %s +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,VECT %s ; GCN-LABEL: test_local_misaligned_v2: @@ -112,15 +114,17 @@ ret void } +; TODO: Reinstate the test below once v3i32/v3f32 is reinstated. + ; GCN-LABEL: test_flat_misaligned_v3: -; VECT-DAG: flat_load_dwordx3 v -; VECT-DAG: flat_store_dwordx3 v -; SPLIT-DAG: flat_load_dword v -; SPLIT-DAG: flat_load_dword v -; SPLIT-DAG: flat_load_dword v -; SPLIT-DAG: flat_store_dword v -; SPLIT-DAG: flat_store_dword v -; SPLIT-DAG: flat_store_dword v +; xVECT-DAG: flat_load_dwordx3 v +; xVECT-DAG: flat_store_dwordx3 v +; xSPLIT-DAG: flat_load_dword v +; xSPLIT-DAG: flat_load_dword v +; xSPLIT-DAG: flat_load_dword v +; xSPLIT-DAG: flat_store_dword v +; xSPLIT-DAG: flat_store_dword v +; xSPLIT-DAG: flat_store_dword v define amdgpu_kernel void @test_flat_misaligned_v3(i32* %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() Index: test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll @@ -1,9 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %clamp) -; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp +; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp ; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +; GFX10: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_fdot2_clamp( float addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -18,8 +21,9 @@ ret void } -; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp +; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp ; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX10: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp( float addrspace(1)* %r, <2 x half> addrspace(1)* %a, Index: test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll @@ -1,9 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp) ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_clamp ; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot2_clamp( i32 addrspace(1)* %r, <2 x i16> addrspace(1)* %a, @@ -20,6 +23,7 @@ ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_no_clamp ; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot2_no_clamp( i32 addrspace(1)* %r, <2 x i16> addrspace(1)* %a, Index: test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll @@ -1,9 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +; GFX10: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp( i32 addrspace(1)* %r, <4 x i8> addrspace(1)* %a, @@ -22,6 +25,7 @@ ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp( i32 addrspace(1)* %r, <4 x i8> addrspace(1)* %a, Index: test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll @@ -1,9 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10,GFX1011 +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10,GFX1011 declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_clamp ; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp( i32 addrspace(1)* %r, <8 x i4> addrspace(1)* %a, @@ -22,6 +25,7 @@ ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_no_clamp ; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX1011: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp( i32 addrspace(1)* %r, <8 x i4> addrspace(1)* %a, Index: test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll @@ -1,9 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp) ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp ; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot2_clamp( i32 addrspace(1)* %r, <2 x i16> addrspace(1)* %a, @@ -20,6 +23,7 @@ ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_no_clamp ; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot2_no_clamp( i32 addrspace(1)* %r, <2 x i16> addrspace(1)* %a, Index: test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll @@ -1,9 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_clamp ; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot4_clamp( i32 addrspace(1)* %r, <4 x i8> addrspace(1)* %a, @@ -22,6 +25,7 @@ ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_no_clamp ; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot4_no_clamp( i32 addrspace(1)* %r, <4 x i8> addrspace(1)* %a, Index: test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll @@ -1,9 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 +; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_clamp ; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +; GFX10: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot8_clamp( i32 addrspace(1)* %r, <8 x i4> addrspace(1)* %a, @@ -22,6 +25,7 @@ ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_no_clamp ; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX10: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot8_no_clamp( i32 addrspace(1)* %r, <8 x i4> addrspace(1)* %a, Index: test/MC/AMDGPU/gfx1011_dlops.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/gfx1011_dlops.s @@ -0,0 +1,53 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck --check-prefix=GFX10 %s + +v_dot2_f32_f16 v0, v1, v2, v3 +// GFX10: encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot2_i32_i16 v0, v1, v2, v3 +// GFX10: encoding: [0x00,0x40,0x14,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot2_u32_u16 v0, v1, v2, v3 +// GFX10: encoding: [0x00,0x40,0x15,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_i32_i8 v0, v1, v2, v3 +// GFX10: encoding: [0x00,0x40,0x16,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_u32_u8 v0, v1, v2, v3 +// GFX10: encoding: [0x00,0x40,0x17,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot8_i32_i4 v0, v1, v2, v3 +// GFX10: encoding: [0x00,0x40,0x18,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot8_u32_u4 v0, v1, v2, v3 +// GFX10: encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot2c_f32_f16 v5, v1, v2 +// GFX10: encoding: [0x01,0x05,0x0a,0x04] + +v_dot2c_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00] + +v_dot2c_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX10: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00] + +v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX10: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX10: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot4c_i32_i8 v5, v1, v2 +// GFX10: encoding: [0x01,0x05,0x0a,0x1a] + +v_dot4c_i32_i8 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00] + +v_dot4c_i32_i8 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX10: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x04,0x00] + +v_dot4c_i32_i8 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX10: encoding: [0xe9,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05] + +v_dot4c_i32_i8 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX10: encoding: [0xea,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05] Index: test/MC/AMDGPU/gfx1011_err.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/gfx1011_err.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s + +v_dot8c_i32_i4 v5, v1, v2 +// GFX10: error: + +v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: error: + +v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX10: error: + +v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX10: error: + +v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX10: error: + +s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES) +// GFX10: error: + +v_fma_legacy_f32 v0, v1, v2, v3 +// GFX10: error: + +image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] +// GFX10: error: + +image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 +// GFX10: error: + +image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] +// GFX10: error: + +image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 +// GFX10: error: + +image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D +// GFX10: error: + +image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D glc +// GFX10: error: + +image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 +// GFX10: error: + +image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D +// GFX10: error: + +image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX10: error: Index: test/MC/AMDGPU/smem.s =================================================================== --- test/MC/AMDGPU/smem.s +++ test/MC/AMDGPU/smem.s @@ -2,38 +2,46 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX1012 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 %s s_dcache_wb // GFX89: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xf4,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU s_dcache_wb_vol // GFX89: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] -// NOSICI: error: instruction not supported on this GPU +// NOSICIGFX10: error: instruction not supported on this GPU s_atc_probe 0x7, s[4:5], s0 // GFX89: s_atc_probe 7, s[4:5], s0 ; encoding: [0xc2,0x01,0x98,0xc0,0x00,0x00,0x00,0x00] +// GFX10: s_atc_probe 7, s[4:5], s0 ; encoding: [0xc2,0x01,0x98,0xf4,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU s_atc_probe 0x0, s[4:5], 0x0 // GFX89: s_atc_probe 0, s[4:5], 0x0 ; encoding: [0x02,0x00,0x9a,0xc0,0x00,0x00,0x00,0x00] +// GFX10: s_atc_probe 0, s[4:5], 0x0 ; encoding: [0x02,0x00,0x98,0xf4,0x00,0x00,0x00,0xfa] // NOSICI: error: instruction not supported on this GPU s_atc_probe_buffer 0x1, s[8:11], s0 // GFX89: s_atc_probe_buffer 1, s[8:11], s0 ; encoding: [0x44,0x00,0x9c,0xc0,0x00,0x00,0x00,0x00] +// GFX10: s_atc_probe_buffer 1, s[8:11], s0 ; encoding: [0x44,0x00,0x9c,0xf4,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU s_atc_probe_buffer 0x0, s[8:11], s101 // GFX89: s_atc_probe_buffer 0, s[8:11], s101 ; encoding: [0x04,0x00,0x9c,0xc0,0x65,0x00,0x00,0x00] +// GFX10: s_atc_probe_buffer 0, s[8:11], s101 ; encoding: [0x04,0x00,0x9c,0xf4,0x00,0x00,0x00,0xca] // NOSICI: error: instruction not supported on this GPU s_memrealtime s[4:5] // GFX89: s_memrealtime s[4:5] ; encoding: [0x00,0x01,0x94,0xc0,0x00,0x00,0x00,0x00] +// GFX10: s_memrealtime s[4:5] ; encoding: [0x00,0x01,0x94,0xf4,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU s_memrealtime tba @@ -47,25 +55,30 @@ // NOGFX9: error: not a valid operand. s_memrealtime ttmp[0:1] -// VI: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1c,0x94,0xc0,0x00,0x00,0x00,0x00] -// GFX9: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00] +// VI: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1c,0x94,0xc0,0x00,0x00,0x00,0x00] +// GFX9: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00] +// GFX10: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1b,0x94,0xf4,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU // FIXME: Should error about instruction on GPU s_store_dword s1, s[2:3], 0xfc // GFX89: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x42,0xc0,0xfc,0x00,0x00,0x00] +// GFX1012: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x40,0xf4,0xfc,0x00,0x00,0xfa] // NOSICI: error: instruction not supported on this GPU s_store_dword s1, s[2:3], 0xfc glc // GFX89: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x43,0xc0,0xfc,0x00,0x00,0x00] +// GFX1012: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x41,0xf4,0xfc,0x00,0x00,0xfa] // NOSICI: error: invalid operand for instruction s_store_dword s1, s[2:3], s4 // GFX89: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xc0,0x04,0x00,0x00,0x00] +// GFX1012: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xf4,0x00,0x00,0x00,0x08] // NOSICI: error: instruction not supported on this GPU s_store_dword s1, s[2:3], s4 glc // GFX89: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xc0,0x04,0x00,0x00,0x00] +// GFX1012: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xf4,0x00,0x00,0x00,0x08] // NOSICI: error: invalid operand for instruction s_store_dword tba_lo, s[2:3], s4 @@ -91,9 +104,11 @@ // FIXME: Should error on SI instead of silently ignoring glc s_load_dword s1, s[2:3], 0xfc glc // GFX89: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00] +// GFX10: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x01,0xf4,0xfc,0x00,0x00,0xfa] s_load_dword s1, s[2:3], s4 glc // GFX89: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00] +// GFX10: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xf4,0x00,0x00,0x00,0x08] s_buffer_store_dword s10, s[92:95], m0 // GFX89: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xc0,0x7c,0x00,0x00,0x00] @@ -139,7 +154,7 @@ s_buffer_load_dword s10, s[92:95], m0 // GFX89: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xc0,0x7c,0x00,0x00,0x00] -// SICI: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] +// SICIGFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] s_buffer_load_dword tba_lo, s[92:95], m0 // VI: s_buffer_load_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] @@ -162,13 +177,15 @@ // NOGFX9: error: not a valid operand. s_buffer_load_dword ttmp0, s[92:95], m0 -// VI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x20,0xc0,0x7c,0x00,0x00,0x00] -// GFX9: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] -// SICI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x7c,0x5c,0x38,0xc2] +// VI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x20,0xc0,0x7c,0x00,0x00,0x00] +// GFX9: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] +// SICI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x7c,0x5c,0x38,0xc2] +// GFX10: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xf4,0x00,0x00,0x00,0xf8] s_buffer_load_dwordx2 s[10:11], s[92:95], m0 // GFX89: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x24,0xc0,0x7c,0x00,0x00,0x00] -// SICI: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0x7c,0x5c,0x45,0xc2] +// SICI: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0x7c,0x5c,0x45,0xc2] +// GFX10: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x24,0xf4,0x00,0x00,0x00,0xf8] s_buffer_load_dwordx2 tba, s[92:95], m0 // VI: s_buffer_load_dwordx2 tba, s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00] @@ -181,13 +198,15 @@ // NOGFX9: error: not a valid operand. s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 -// VI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1c,0x24,0xc0,0x7c,0x00,0x00,0x00] -// GFX9: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00] -// SICI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x7c,0x5c,0x78,0xc2] +// VI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1c,0x24,0xc0,0x7c,0x00,0x00,0x00] +// GFX9: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00] +// SICI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x7c,0x5c,0x78,0xc2] +// GFX10: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xf4,0x00,0x00,0x00,0xf8] // FIXME: Should error on SI instead of silently ignoring glc s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xc0,0x7c,0x00,0x00,0x00] +// GFX10: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xf4,0x00,0x00,0x00,0xf8] //===----------------------------------------------------------------------===// // s_scratch instructions @@ -195,38 +214,47 @@ s_scratch_load_dword s5, s[2:3], s101 // GFX9: s_scratch_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc0,0x65,0x00,0x00,0x00] +// GFX1012: s_scratch_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf4,0x00,0x00,0x00,0xca] // NOSICIVI: error: instruction not supported on this GPU s_scratch_load_dword s5, s[2:3], s0 glc // GFX9: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xf4,0x00,0x00,0x00,0x00] // NOSICIVI: error s_scratch_load_dwordx2 s[100:101], s[2:3], s0 // GFX9: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xf4,0x00,0x00,0x00,0x00] // NOSICIVI: error: instruction not supported on this GPU s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc // GFX9: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x1b,0xc0,0x01,0x00,0x00,0x00] +// GFX1012: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x19,0xf4,0x01,0x00,0x00,0xfa] // NOSICIVI: error s_scratch_load_dwordx4 s[20:23], s[4:5], s0 // GFX9: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xf4,0x00,0x00,0x00,0x00] // NOSICIVI: error: instruction not supported on this GPU s_scratch_store_dword s101, s[4:5], s0 // GFX9: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00] // NOSICIVI: error: instruction not supported on this GPU s_scratch_store_dword s1, s[4:5], 0x123 glc // GFX9: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x57,0xc0,0x23,0x01,0x00,0x00] +// GFX1012: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa] // NOSICIVI: error s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc // GFX9: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xc0,0x65,0x00,0x00,0x00] +// GFX1012: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca] // NOSICIVI: error s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc // GFX9: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xf4,0x00,0x00,0x00,0x00] // NOSICIVI: error //===----------------------------------------------------------------------===// @@ -235,18 +263,22 @@ s_dcache_discard s[2:3], s0 // GFX9: s_dcache_discard s[2:3], s0 ; encoding: [0x01,0x00,0xa0,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_dcache_discard s[2:3], s0 ; encoding: [0x01,0x00,0xa0,0xf4,0x00,0x00,0x00,0x00] // NOSICIVI: error: instruction not supported on this GPU s_dcache_discard s[2:3], 0x0 // GFX9: s_dcache_discard s[2:3], 0x0 ; encoding: [0x01,0x00,0xa2,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_dcache_discard s[2:3], 0x0 ; encoding: [0x01,0x00,0xa0,0xf4,0x00,0x00,0x00,0xfa] // NOSICIVI: error: instruction not supported on this GPU s_dcache_discard_x2 s[2:3], s101 // GFX9: s_dcache_discard_x2 s[2:3], s101 ; encoding: [0x01,0x00,0xa4,0xc0,0x65,0x00,0x00,0x00] +// GFX1012: s_dcache_discard_x2 s[2:3], s101 ; encoding: [0x01,0x00,0xa4,0xf4,0x00,0x00,0x00,0xca] // NOSICIVI: error: instruction not supported on this GPU s_dcache_discard_x2 s[2:3], 0x0 // GFX9: s_dcache_discard_x2 s[2:3], 0x0 ; encoding: [0x01,0x00,0xa6,0xc0,0x00,0x00,0x00,0x00] +// GFX1012: s_dcache_discard_x2 s[2:3], 0x0 ; encoding: [0x01,0x00,0xa4,0xf4,0x00,0x00,0x00,0xfa] // NOSICIVI: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// @@ -255,130 +287,162 @@ s_atomic_add s5, s[2:3], s101 // GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_add s5, s[2:3], 0x0 // GFX9: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_atomic_add s5, s[2:3], s0 glc // GFX9: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_add_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_and s5, s[2:3], s101 // GFX9: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_and_x2 s[10:11], s[2:3], 0x0 // GFX9: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa2,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa0,0xf6,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_atomic_cmpswap s[10:11], s[2:3], s101 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_cmpswap s[10:11], s[2:3], 0x0 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_atomic_cmpswap s[10:11], s[2:3], s0 glc // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x86,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_dec s5, s[2:3], s0 glc // GFX9: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_dec_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_inc s5, s[2:3], s0 glc // GFX9: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_inc_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_or s5, s[2:3], 0x0 // GFX9: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x26,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x24,0xf6,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_atomic_or_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_smax s5, s[2:3], s101 // GFX9: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_smin s5, s[2:3], s101 // GFX9: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_sub s5, s[2:3], s101 // GFX9: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_swap s5, s[2:3], s101 // GFX9: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_umax s5, s[2:3], s0 glc // GFX9: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_umax_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_umin s5, s[2:3], s101 // GFX9: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_atomic_xor s5, s[2:3], s101 // GFX9: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xc2,0x65,0x00,0x00,0x00] +// GFX1012: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xf6,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xc2,0x00,0x00,0x00,0x00] +// GFX1012: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xf6,0x00,0x00,0x00,0x00] // NOSICIVI: error: //===----------------------------------------------------------------------===// @@ -387,128 +451,160 @@ s_buffer_atomic_add s5, s[4:7], s101 // GFX9: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xc1,0x65,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_buffer_atomic_add s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x0a,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_buffer_atomic_add s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_and s101, s[4:7], s0 // GFX9: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 // GFX9: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x06,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xc1,0x65,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x86,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_dec s5, s[4:7], s0 // GFX9: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_inc s101, s[4:7], s0 // GFX9: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xae,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xac,0xf5,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_buffer_atomic_or s5, s[8:11], s0 // GFX9: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 // GFX9: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_smax s5, s[4:7], s101 // GFX9: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xc1,0x65,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xf5,0x00,0x00,0x00,0xca] // NOSICIVI: error: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 // GFX9: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_smin s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x12,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x10,0xf5,0x00,0x00,0x00,0xfa] // NOSICIVI: error: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 // GFX9: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_sub s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_swap s5, s[4:7], s0 // GFX9: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_umax s5, s[4:7], s0 // GFX9: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_umin s5, s[4:7], s0 // GFX9: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_xor s5, s[4:7], s0 // GFX9: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xc1,0x00,0x00,0x00,0x00] +// GFX1012: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: Index: test/MC/AMDGPU/xdl-insts-gfx1011-gfx1012.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/xdl-insts-gfx1011-gfx1012.s @@ -0,0 +1,158 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s | FileCheck %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck %s + +v_dot2c_f32_f16_e32 v5, v1, v2 +// CHECK: encoding: [0x01,0x05,0x0a,0x04] + +v_dot2c_f32_f16_e32 v255, v1, v2 +// CHECK: encoding: [0x01,0x05,0xfe,0x05] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00] + +v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0xfe,0x05,0x01,0xe4,0x00,0x00] + +v_dot2c_f32_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0xff,0xe4,0x00,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0xfe,0x0b,0x04,0x01,0xe4,0x00,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0x00] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x10] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x30] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x01] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x03] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f] + +v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x08,0x00] + +v_dot2c_f32_f16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x10,0x00] + +v_dot2c_f32_f16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x20,0x00] + +v_dot2c_f32_f16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x40,0x00] + +v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00] + +v_dot4c_i32_i8_e32 v5, v1, v2 +// CHECK: encoding: [0x01,0x05,0x0a,0x1a] + +v_dot4c_i32_i8_e32 v255, v1, v2 +// CHECK: encoding: [0x01,0x05,0xfe,0x1b] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00] + +v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0xfe,0x1b,0x01,0xe4,0x00,0x00] + +v_dot4c_i32_i8_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0xff,0xe4,0x00,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0xfe,0x0b,0x1a,0x01,0xe4,0x00,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1b,0x00,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x40,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x41,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x01,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x0f,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x11,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1f,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x21,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x2f,0x01,0x00] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x10] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x30] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x01] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x03] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f] + +v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x08,0x00] Index: test/MC/Disassembler/AMDGPU/gfx1011_dasm_dlops.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/gfx1011_dasm_dlops.txt @@ -0,0 +1,53 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s + +# GFX10: v_dot2_f32_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c + +# GFX10: v_dot2_i32_i16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x14,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x14,0xcc,0x01,0x05,0x0e,0x1c + +# GFX10: v_dot2_u32_u16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x15,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x15,0xcc,0x01,0x05,0x0e,0x1c + +# GFX10: v_dot4_i32_i8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x16,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x16,0xcc,0x01,0x05,0x0e,0x1c + +# GFX10: v_dot4_u32_u8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x17,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x17,0xcc,0x01,0x05,0x0e,0x1c + +# GFX10: v_dot8_i32_i4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x18,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x18,0xcc,0x01,0x05,0x0e,0x1c + +# GFX10: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c + +# GFX10: v_dot2c_f32_f16_e32 v5, v1, v2 +0x01,0x05,0x0a,0x04 + +# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00 + +# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00 + +# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05 + +# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05 + +# GFX10: v_dot4c_i32_i8_e32 v5, v1, v2 +0x01,0x05,0x0a,0x1a + +# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00 + +# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x04,0x00 + +# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +0xe9,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05 + +# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +0xea,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05 Index: test/MC/Disassembler/AMDGPU/xdl-insts-gfx1011-gfx1012.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/xdl-insts-gfx1011-gfx1012.txt @@ -0,0 +1,158 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -disassemble -show-encoding < %s | FileCheck %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -disassemble -show-encoding < %s | FileCheck %s + +# CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04] +0x01,0x05,0x0a,0x04 + +# CHECK: v_dot2c_f32_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05] +0x01,0x05,0xfe,0x05 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x05,0x01,0xe4,0x00,0x00] +0xfa,0x04,0xfe,0x05,0x01,0xe4,0x00,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0xff,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x04,0xff,0xe4,0x00,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xfe,0x0b,0x04,0x01,0xe4,0x00,0x00] +0xfa,0xfe,0x0b,0x04,0x01,0xe4,0x00,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0x00] +0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x10] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x10 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x30] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x30 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x01] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x01 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x03] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x03 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x08,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x08,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x10,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x10,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x20,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x20,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x40,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x40,0x00 + +# CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00 + +# CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a] +0x01,0x05,0x0a,0x1a + +# CHECK: v_dot4c_i32_i8_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b] +0x01,0x05,0xfe,0x1b + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x1b,0x01,0xe4,0x00,0x00] +0xfa,0x04,0xfe,0x1b,0x01,0xe4,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0xff,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x1a,0xff,0xe4,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xfe,0x0b,0x1a,0x01,0xe4,0x00,0x00] +0xfa,0xfe,0x0b,0x1a,0x01,0xe4,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x1b,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x40,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x40,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x41,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x41,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x01,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x01,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x0f,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x0f,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x11,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x11,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1f,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x1f,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x21,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x21,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x2f,0x01,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0x2f,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x10] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x10 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x30] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x30 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x01] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x01 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x03] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x03 + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f + +# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x08,0x00] +0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x08,0x00 Index: tools/llvm-readobj/ELFDumper.cpp =================================================================== --- tools/llvm-readobj/ELFDumper.cpp +++ tools/llvm-readobj/ELFDumper.cpp @@ -1297,6 +1297,8 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC) };