Index: clang/include/clang/Basic/Cuda.h =================================================================== --- clang/include/clang/Basic/Cuda.h +++ clang/include/clang/Basic/Cuda.h @@ -93,6 +93,7 @@ GFX90c, GFX940, GFX941, + GFX942, GFX1010, GFX1011, GFX1012, Index: clang/lib/Basic/Cuda.cpp =================================================================== --- clang/lib/Basic/Cuda.cpp +++ clang/lib/Basic/Cuda.cpp @@ -115,6 +115,7 @@ GFX(90c), // gfx90c GFX(940), // gfx940 GFX(941), // gfx941 + GFX(942), // gfx942 GFX(1010), // gfx1010 GFX(1011), // gfx1011 GFX(1012), // gfx1012 Index: clang/lib/Basic/Targets/NVPTX.cpp =================================================================== --- clang/lib/Basic/Targets/NVPTX.cpp +++ clang/lib/Basic/Targets/NVPTX.cpp @@ -196,6 +196,7 @@ case CudaArch::GFX90c: case CudaArch::GFX940: case CudaArch::GFX941: + case CudaArch::GFX942: case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -3581,6 +3581,7 @@ case CudaArch::GFX90c: case CudaArch::GFX940: case CudaArch::GFX941: + case CudaArch::GFX942: case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: Index: clang/test/CodeGenOpenCL/amdgpu-features.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-features.cl +++ clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -31,6 +31,7 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s @@ -77,6 +78,7 @@ // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" Index: clang/test/Driver/amdgpu-macros.cl =================================================================== --- clang/test/Driver/amdgpu-macros.cl +++ clang/test/Driver/amdgpu-macros.cl @@ -110,6 +110,7 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10 Index: clang/test/Driver/amdgpu-mcpu.cl =================================================================== --- clang/test/Driver/amdgpu-mcpu.cl +++ clang/test/Driver/amdgpu-mcpu.cl @@ -94,6 +94,7 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s // RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s // RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s @@ -135,6 +136,7 @@ // GFX90C: "-target-cpu" "gfx90c" // GFX940: "-target-cpu" "gfx940" // GFX941: "-target-cpu" "gfx941" +// GFX942: "-target-cpu" "gfx942" // GFX1010: "-target-cpu" "gfx1010" // GFX1011: "-target-cpu" "gfx1011" // GFX1012: "-target-cpu" "gfx1012" Index: clang/test/Misc/target-invalid-cpu-note.c =================================================================== --- clang/test/Misc/target-invalid-cpu-note.c +++ clang/test/Misc/target-invalid-cpu-note.c @@ -29,7 +29,7 @@ // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' -// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}} +// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}} // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 // R600: error: unknown target CPU 'not-a-cpu' @@ -37,7 +37,7 @@ // RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN // AMDGCN: error: unknown target CPU 'not-a-cpu' -// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}} +// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}} // RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM // WEBASM: error: unknown target CPU 'not-a-cpu' Index: llvm/docs/AMDGPUUsage.rst =================================================================== --- llvm/docs/AMDGPUUsage.rst +++ llvm/docs/AMDGPUUsage.rst @@ -392,6 +392,13 @@ work-item Add product IDs names. + ``gfx942`` ``amdgcn`` dGPU - sramecc - Architected *TBA* + - tgsplit flat + - xnack scratch .. TODO:: + - Packed + work-item Add product + IDs names. + **GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_ ----------------------------------------------------------------------------------------------------------------------- ``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700 @@ -1332,6 +1339,7 @@ *reserved* 0x049 Reserved. *reserved* 0x04a Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941`` + ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` ==================================== ========== ============================= Sections Index: llvm/include/llvm/BinaryFormat/ELF.h =================================================================== --- llvm/include/llvm/BinaryFormat/ELF.h +++ llvm/include/llvm/BinaryFormat/ELF.h @@ -782,10 +782,11 @@ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4A = 0x04a, EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b, + EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c, // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX941, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX942, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. Index: llvm/include/llvm/TargetParser/TargetParser.h =================================================================== --- llvm/include/llvm/TargetParser/TargetParser.h +++ llvm/include/llvm/TargetParser/TargetParser.h @@ -85,6 +85,7 @@ GK_GFX90C = 67, GK_GFX940 = 68, GK_GFX941 = 69, + GK_GFX942 = 70, GK_GFX1010 = 71, GK_GFX1011 = 72, Index: llvm/lib/Object/ELFObjectFile.cpp =================================================================== --- llvm/lib/Object/ELFObjectFile.cpp +++ llvm/lib/Object/ELFObjectFile.cpp @@ -465,6 +465,8 @@ return "gfx940"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: return "gfx941"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: + return "gfx942"; // AMDGCN GFX10. case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: Index: llvm/lib/ObjectYAML/ELFYAML.cpp =================================================================== --- llvm/lib/ObjectYAML/ELFYAML.cpp +++ llvm/lib/ObjectYAML/ELFYAML.cpp @@ -591,6 +591,7 @@ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH); Index: llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.td +++ llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1261,6 +1261,40 @@ FullRate64Ops, FeatureBackOffBarrier]>; +def FeatureISAVersion9_4_2 : FeatureSet< + [FeatureGFX9, + FeatureGFX90AInsts, + FeatureGFX940Insts, + FeatureFmaMixInsts, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureFmacF64Inst, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot3Insts, + FeatureDot4Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureDot7Insts, + FeatureDot10Insts, + FeatureAtomicDsPkAdd16Insts, + FeatureAtomicFlatPkAdd16Insts, + Feature64BitDPP, + FeaturePackedFP32Ops, + FeatureMAIInsts, + FeatureFP8Insts, + FeaturePkFmacF16Inst, + FeatureAtomicFaddRtnInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureAtomicBufferGlobalPkAddF16Insts, + FeatureAtomicGlobalPkAddBF16Inst, + FeatureFlatAtomicFaddF32Inst, + FeatureSupportsSRAMECC, + FeaturePackedTID, + FeatureArchitectedFlatScratch, + FullRate64Ops, + FeatureBackOffBarrier]>; + // TODO: Organize more features into groups. def FeatureGroup { // Bugs present on gfx10.1. Index: llvm/lib/Target/AMDGPU/GCNProcessors.td =================================================================== --- llvm/lib/Target/AMDGPU/GCNProcessors.td +++ llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -200,6 +200,10 @@ FeatureISAVersion9_4_1.Features >; +def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, + FeatureISAVersion9_4_2.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX10. //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -108,6 +108,7 @@ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; @@ -178,6 +179,7 @@ case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C; case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940; case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941; + case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; Index: llvm/lib/TargetParser/TargetParser.cpp =================================================================== --- llvm/lib/TargetParser/TargetParser.cpp +++ llvm/lib/TargetParser/TargetParser.cpp @@ -106,6 +106,7 @@ {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, + {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, @@ -226,6 +227,7 @@ case GK_GFX90C: return {9, 0, 12}; case GK_GFX940: return {9, 4, 0}; case GK_GFX941: return {9, 4, 1}; + case GK_GFX942: return {9, 4, 2}; case GK_GFX1010: return {10, 1, 0}; case GK_GFX1011: return {10, 1, 1}; case GK_GFX1012: return {10, 1, 2}; @@ -324,6 +326,7 @@ Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; break; + case GK_GFX942: case GK_GFX941: case GK_GFX940: Features["gfx940-insts"] = true; Index: llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -77,6 +77,9 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 < %s | FileCheck --check-prefixes=GFX941 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX941-NOXNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX941-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s @@ -158,6 +161,9 @@ ; GFX941: .amdgcn_target "amdgcn-amd-amdhsa--gfx941" ; GFX941-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack-" ; GFX941-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack+" +; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942" +; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-" +; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+" ; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" ; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-" ; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+" Index: llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -56,6 +56,7 @@ ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s @@ -124,6 +125,7 @@ ; GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32) ; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40) ; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B) +; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C) ; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33) ; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34) ; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35) Index: llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml =================================================================== --- llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -158,6 +158,10 @@ # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX941 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX941 %s # RUN: obj2yaml %t.o.AMDGCN_GFX941 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX941 %s +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX942/' %s | yaml2obj -o %t.o.AMDGCN_GFX942 +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s +# RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s + # RUN: sed -e 's//64/' -e 's//AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010 # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s # RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s @@ -355,6 +359,9 @@ # ELF-AMDGCN-GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B) # YAML-AMDGCN-GFX941: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX941 ] +# ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C) +# YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ] + # ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33) # YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ] Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -85,6 +85,11 @@ ; ----------------------------------GFX9--------------------------------------- ; +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx941 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt Index: llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test =================================================================== --- llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test +++ llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test @@ -214,6 +214,15 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -DFLAG_VALUE=0x4B +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33 Index: llvm/tools/llvm-readobj/ELFDumper.cpp =================================================================== --- llvm/tools/llvm-readobj/ELFDumper.cpp +++ llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1589,6 +1589,7 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX941), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX942), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012), @@ -1650,6 +1651,7 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX941), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX942), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),