Index: clang/include/clang/Basic/Cuda.h =================================================================== --- clang/include/clang/Basic/Cuda.h +++ clang/include/clang/Basic/Cuda.h @@ -83,6 +83,7 @@ GFX1010, GFX1011, GFX1012, + GFX1013, GFX1030, GFX1031, GFX1032, Index: clang/lib/Basic/Cuda.cpp =================================================================== --- clang/lib/Basic/Cuda.cpp +++ clang/lib/Basic/Cuda.cpp @@ -105,6 +105,7 @@ GFX(1010), // gfx1010 GFX(1011), // gfx1011 GFX(1012), // gfx1012 + GFX(1013), // gfx1013 GFX(1030), // gfx1030 GFX(1031), // gfx1031 GFX(1032), // gfx1032 Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -214,6 +214,7 @@ Features["dot6-insts"] = true; Features["dot7-insts"] = true; LLVM_FALLTHROUGH; + case GK_GFX1013: case GK_GFX1010: Features["dl-insts"] = true; Features["ci-insts"] = true; Index: clang/lib/Basic/Targets/NVPTX.cpp =================================================================== --- clang/lib/Basic/Targets/NVPTX.cpp +++ clang/lib/Basic/Targets/NVPTX.cpp @@ -207,6 +207,7 @@ case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: + case CudaArch::GFX1013: case CudaArch::GFX1030: case CudaArch::GFX1031: case CudaArch::GFX1032: Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -4482,6 +4482,7 @@ case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: + case CudaArch::GFX1013: case CudaArch::GFX1030: case CudaArch::GFX1031: case CudaArch::GFX1032: @@ -4553,6 +4554,7 @@ case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: + case CudaArch::GFX1013: case CudaArch::GFX1030: case CudaArch::GFX1031: case CudaArch::GFX1032: Index: clang/test/CodeGenOpenCL/amdgpu-features.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-features.cl +++ clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -28,6 +28,7 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1013 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1013 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1030 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1030 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1031 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1031 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1032 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1032 %s @@ -59,6 +60,7 @@ // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" Index: clang/test/Driver/amdgpu-macros.cl =================================================================== --- clang/test/Driver/amdgpu-macros.cl +++ clang/test/Driver/amdgpu-macros.cl @@ -110,6 +110,7 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1013 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1013 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1030 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1031 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1031 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1032 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1032 Index: clang/test/Driver/amdgpu-mcpu.cl =================================================================== --- clang/test/Driver/amdgpu-mcpu.cl +++ clang/test/Driver/amdgpu-mcpu.cl @@ -95,6 +95,7 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1013 %s 2>&1 | FileCheck --check-prefix=GFX1013 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefix=GFX1030 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1031 %s 2>&1 | FileCheck --check-prefix=GFX1031 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1032 %s 2>&1 | FileCheck --check-prefix=GFX1032 %s @@ -127,6 +128,7 @@ // GFX1010: "-target-cpu" "gfx1010" // GFX1011: "-target-cpu" "gfx1011" // GFX1012: "-target-cpu" "gfx1012" +// GFX1013: "-target-cpu" "gfx1013" // GFX1030: "-target-cpu" "gfx1030" // GFX1031: "-target-cpu" "gfx1031" // GFX1032: "-target-cpu" "gfx1032" Index: clang/test/Misc/target-invalid-cpu-note.c =================================================================== --- clang/test/Misc/target-invalid-cpu-note.c +++ clang/test/Misc/target-invalid-cpu-note.c @@ -86,7 +86,7 @@ // AMDGCN-SAME: gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, // AMDGCN-SAME: gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, // AMDGCN-SAME: gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, -// AMDGCN-SAME: gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1030, gfx1031, +// AMDGCN-SAME: gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, // AMDGCN-SAME: gfx1032, gfx1033, gfx1034 // RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM Index: llvm/docs/AMDGPUUsage.rst =================================================================== --- llvm/docs/AMDGPUUsage.rst +++ llvm/docs/AMDGPUUsage.rst @@ -386,6 +386,13 @@ ``gfx1012`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5500 - wavefrontsize64 flat - *pal-amdhsa* - Radeon RX 5500 XT - xnack scratch - *pal-amdpal* + ``gfx1013`` ``amdgcn`` APU - cumode - Absolute - *rocm-amdhsa* *TBA* + - wavefrontsize64 flat - *pal-amdhsa* + - xnack scratch - *pal-amdpal* .. TODO:: + + Add product + names. + **GCN GFX10 (RDNA 2)** [AMD-GCN-GFX10-RDNA2]_ ----------------------------------------------------------------------------------------------------------------------- ``gfx1030`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 6800 @@ -1149,6 +1156,7 @@ ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` *reserved* 0x040 Reserved. *reserved* 0x041 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` ==================================== ========== ============================= Sections Index: llvm/include/llvm/BinaryFormat/ELF.h =================================================================== --- llvm/include/llvm/BinaryFormat/ELF.h +++ llvm/include/llvm/BinaryFormat/ELF.h @@ -742,10 +742,11 @@ EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X41 = 0x041, + EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042, // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX90A, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1013, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. Index: llvm/include/llvm/Support/TargetParser.h =================================================================== --- llvm/include/llvm/Support/TargetParser.h +++ llvm/include/llvm/Support/TargetParser.h @@ -89,6 +89,7 @@ GK_GFX1010 = 71, GK_GFX1011 = 72, GK_GFX1012 = 73, + GK_GFX1013 = 74, GK_GFX1030 = 75, GK_GFX1031 = 76, GK_GFX1032 = 77, Index: llvm/lib/Object/ELFObjectFile.cpp =================================================================== --- llvm/lib/Object/ELFObjectFile.cpp +++ llvm/lib/Object/ELFObjectFile.cpp @@ -469,6 +469,8 @@ return "gfx1011"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: return "gfx1012"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: + return "gfx1013"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: return "gfx1030"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: Index: llvm/lib/ObjectYAML/ELFYAML.cpp =================================================================== --- llvm/lib/ObjectYAML/ELFYAML.cpp +++ llvm/lib/ObjectYAML/ELFYAML.cpp @@ -549,6 +549,7 @@ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1013, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1030, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1031, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1032, EF_AMDGPU_MACH); Index: llvm/lib/Support/TargetParser.cpp =================================================================== --- llvm/lib/Support/TargetParser.cpp +++ llvm/lib/Support/TargetParser.cpp @@ -109,6 +109,7 @@ {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, + {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, @@ -220,6 +221,7 @@ case GK_GFX1010: return {10, 1, 0}; case GK_GFX1011: return {10, 1, 1}; case GK_GFX1012: return {10, 1, 2}; + case GK_GFX1013: return {10, 1, 3}; case GK_GFX1030: return {10, 3, 0}; case GK_GFX1031: return {10, 3, 1}; case GK_GFX1032: return {10, 3, 2}; Index: llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.td +++ llvm/lib/Target/AMDGPU/AMDGPU.td @@ -465,6 +465,12 @@ "Support mips != 0, lod != 0, gather4, and get_lod" >; +def FeatureGFX10_AEncoding : SubtargetFeature<"gfx10_a-encoding", + "GFX10_AEncoding", + "true", + "Encoding format GFX10_A" +>; + def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding", "GFX10_BEncoding", "true", @@ -1077,8 +1083,26 @@ FeatureLdsMisalignedBug, FeatureSupportsXNACK])>; +def FeatureISAVersion10_1_3 : FeatureSet< + !listconcat(FeatureGroup.GFX10_1_Bugs, + [FeatureGFX10, + FeatureGFX10_AEncoding, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureNSAEncoding, + FeatureWavefrontSize32, + FeatureScalarStores, + FeatureScalarAtomics, + FeatureScalarFlatScratchInsts, + FeatureGetWaveIdInst, + FeatureMadMacF32Insts, + FeatureDsSrc2Insts, + FeatureLdsMisalignedBug, + FeatureSupportsXNACK])>; + def FeatureISAVersion10_3_0 : FeatureSet< [FeatureGFX10, + FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, FeatureLDSBankCount32, @@ -1291,6 +1315,9 @@ def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; +def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, + AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; + def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">, AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4693,6 +4693,16 @@ Register RayInvDir = MI.getOperand(6).getReg(); Register TDescr = MI.getOperand(7).getReg(); + if (!ST.hasGFX10_AEncoding()) { + DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(), + "intrinsic not supported on subtarget", + MI.getDebugLoc()); + B.getMF().getFunction().getContext().diagnose(BadIntrin); + B.buildUndef(MI.getOperand(0)); + MI.eraseFromParent(); + return true; + } + bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16; bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64; unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -262,6 +262,7 @@ HasGFX10A16(false), HasG16(false), HasNSAEncoding(false), + GFX10_AEncoding(false), GFX10_BEncoding(false), HasDLInsts(false), HasDot1Insts(false), Index: llvm/lib/Target/AMDGPU/GCNProcessors.td =================================================================== --- llvm/lib/Target/AMDGPU/GCNProcessors.td +++ llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -208,6 +208,10 @@ FeatureISAVersion10_1_2.Features >; +def : ProcessorModel<"gfx1013", GFX10SpeedModel, + FeatureISAVersion10_1_3.Features +>; + def : ProcessorModel<"gfx1030", GFX10SpeedModel, FeatureISAVersion10_3_0.Features >; Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -136,6 +136,7 @@ bool HasGFX10A16; bool HasG16; bool HasNSAEncoding; + bool GFX10_AEncoding; bool GFX10_BEncoding; bool HasDLInsts; bool HasDot1Insts; @@ -872,6 +873,10 @@ bool hasNSAEncoding() const { return HasNSAEncoding; } + bool hasGFX10_AEncoding() const { + return GFX10_AEncoding; + } + bool hasGFX10_BEncoding() const { return GFX10_BEncoding; } Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -105,6 +105,7 @@ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break; @@ -166,6 +167,7 @@ case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; + case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013; case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030; case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031; case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032; Index: llvm/lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -884,8 +884,8 @@ multiclass MIMG_IntersectRay { def "" : MIMGBaseOpcode; - let SubtargetPredicate = HasGFX10_BEncoding, - AssemblerPredicate = HasGFX10_BEncoding, + let SubtargetPredicate = HasGFX10_AEncoding, + AssemblerPredicate = HasGFX10_AEncoding, AsmMatchConverter = !if(A16, "cvtIntersectRay", ""), dmask = 0xf, unorm = 1, @@ -1034,7 +1034,7 @@ //def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>; //def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; -let SubtargetPredicate = HasGFX10_BEncoding in +let SubtargetPredicate = HasGFX10_AEncoding in defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler , "image_msaa_load", 1, 0, 0, 1>; defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh_intersect_ray", 11, 0>; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7338,6 +7338,11 @@ assert(RayDir.getValueType() == MVT::v4f16 || RayDir.getValueType() == MVT::v4f32); + if (!Subtarget->hasGFX10_AEncoding()) + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), + emitRemovedIntrinsicError(DAG, DL, Op.getValueType()), + Op.getOperand(0)); + bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16; bool Is64 = NodePtr.getValueType() == MVT::i64; unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -737,6 +737,7 @@ bool isGFX10(const MCSubtargetInfo &STI); bool isGFX10Plus(const MCSubtargetInfo &STI); bool isGCN3Encoding(const MCSubtargetInfo &STI); +bool isGFX10_AEncoding(const MCSubtargetInfo &STI); bool isGFX10_BEncoding(const MCSubtargetInfo &STI); bool hasGFX10_3Insts(const MCSubtargetInfo &STI); bool isGFX90A(const MCSubtargetInfo &STI); Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1447,6 +1447,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; } +bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10_AEncoding]; +} + bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1013 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr) ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr) @@ -17,6 +19,7 @@ ; GCN-NEXT: image_bvh_intersect_ray v[0:3], [v0, v1, v2, v3, v4, v6, v7, v8, v10, v11, v12], s[0:3] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r @@ -38,6 +41,7 @@ ; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:7], s[0:3] a16 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh_intersect_ray_a16{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r @@ -49,6 +53,7 @@ ; GCN-NEXT: image_bvh64_intersect_ray v[0:3], [v0, v1, v2, v3, v4, v5, v7, v8, v9, v11, v12, v13], s[0:3] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh64_intersect_ray{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r @@ -70,6 +75,7 @@ ; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh64_intersect_ray_a16{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r @@ -99,6 +105,7 @@ ; GCN-NEXT: v_mov_b32_e32 v2, v20 ; GCN-NEXT: v_mov_b32_e32 v3, v21 ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh_intersect_ray_vgpr_descr{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r @@ -138,6 +145,7 @@ ; GCN-NEXT: v_mov_b32_e32 v2, v7 ; GCN-NEXT: v_mov_b32_e32 v3, v8 ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh_intersect_ray_a16_vgpr_descr{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r @@ -167,6 +175,7 @@ ; GCN-NEXT: v_mov_b32_e32 v2, v21 ; GCN-NEXT: v_mov_b32_e32 v3, v22 ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh64_intersect_ray_vgpr_descr{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r @@ -206,6 +215,7 @@ ; GCN-NEXT: v_mov_b32_e32 v2, v8 ; GCN-NEXT: v_mov_b32_e32 v3, v9 ; GCN-NEXT: ; return to shader part epilog +; ERR: in function image_bvh64_intersect_ray_a16_vgpr_descr{{.*}}intrinsic not supported on subtarget %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> ret <4 x float> %r Index: llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -80,6 +80,9 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s @@ -168,6 +171,9 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=GFX1012 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1012-NOXNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1012-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 < %s | FileCheck --check-prefixes=GFX1013 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1013-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1013-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=GFX1030 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX1031 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=GFX1032 %s @@ -214,6 +220,8 @@ ; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack" ; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" ; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack" +; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013" +; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack" ; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" ; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" ; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" @@ -280,6 +288,9 @@ ; GFX1012: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" ; GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack-" ; GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack+" +; GFX1013: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013" +; GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013:xnack-" +; GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013:xnack+" ; GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" ; GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" ; GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" Index: llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -57,6 +57,7 @@ ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1013 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1013 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1030 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1030 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1031 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1031 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1032 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1032 %s @@ -116,6 +117,7 @@ ; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33) ; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34) ; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35) +; GFX1013: EF_AMDGPU_MACH_AMDGCN_GFX1013 (0x42) ; GFX1030: EF_AMDGPU_MACH_AMDGCN_GFX1030 (0x36) ; GFX1031: EF_AMDGPU_MACH_AMDGCN_GFX1031 (0x37) ; GFX1032: EF_AMDGPU_MACH_AMDGCN_GFX1032 (0x38) Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll @@ -1,4 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx1013 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERR %s ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr) ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr) @@ -12,6 +14,7 @@ ; GCN-LABEL: {{^}}image_bvh_intersect_ray: ; GCN: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]{{$}} +; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget ; Arguments are flattened to represent the actual VGPR_A layout, so we have no ; extra moves in the generated kernel. define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) { @@ -32,6 +35,7 @@ ; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16: ; GCN: image_bvh_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +; ERR: in function image_bvh_intersect_ray_a16{{.*}}intrinsic not supported on subtarget define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) { main_body: %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) @@ -41,6 +45,7 @@ ; GCN-LABEL: {{^}}image_bvh64_intersect_ray: ; GCN: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]{{$}} +; ERR: in function image_bvh64_intersect_ray{{.*}}intrinsic not supported on subtarget ; Arguments are flattened to represent the actual VGPR_A layout, so we have no ; extra moves in the generated kernel. define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) { @@ -62,6 +67,7 @@ ; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16: ; GCN: image_bvh64_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +; ERR: in function{{.*}}intrinsic not supported on subtarget define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) { main_body: %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) @@ -73,6 +79,7 @@ ; GCN-LABEL: {{^}}image_bvh_intersect_ray_nsa_reassign: ; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; ERR: in function image_bvh_intersect_ray_nsa_reassign{{.*}}intrinsic not supported on subtarget define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -96,6 +103,7 @@ ; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16_nsa_reassign: ; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +; ERR: in function image_bvh_intersect_ray_a16_nsa_reassign{{.*}}intrinsic not supported on subtarget define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -119,6 +127,7 @@ ; GCN-LABEL: {{^}}image_bvh64_intersect_ray_nsa_reassign: ; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; ERR: in function image_bvh64_intersect_ray_nsa_reassign{{.*}}intrinsic not supported on subtarget define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -140,6 +149,7 @@ ; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16_nsa_reassign: ; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +; ERR: in function image_bvh64_intersect_ray_a16_nsa_reassign{{.*}}intrinsic not supported on subtarget define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/test/MC/AMDGPU/dl-insts-err.s =================================================================== --- llvm/test/MC/AMDGPU/dl-insts-err.s +++ llvm/test/MC/AMDGPU/dl-insts-err.s @@ -2,6 +2,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1013 %s 2>&1 | FileCheck %s --check-prefix=GFX1013 // // Test unsupported GPUs. @@ -12,18 +13,25 @@ // CHECK: error: instruction not supported on this GPU v_xnor_b32 v0, v1, v2 // CHECK: error: instruction not supported on this GPU +// GFX1013: error: instruction not supported on this GPU v_dot2_f32_f16 v0, v1, v2, v3 // CHECK: error: instruction not supported on this GPU +// GFX1013: error: instruction not supported on this GPU v_dot2_i32_i16 v0, v1, v2, v3 // CHECK: error: instruction not supported on this GPU +// GFX1013: error: instruction not supported on this GPU v_dot2_u32_u16 v0, v1, v2, v3 // CHECK: error: instruction not supported on this GPU +// GFX1013: error: instruction not supported on this GPU v_dot4_i32_i8 v0, v1, v2, v3 // CHECK: error: instruction not supported on this GPU +// GFX1013: error: instruction not supported on this GPU v_dot4_u32_u8 v0, v1, v2, v3 // CHECK: error: instruction not supported on this GPU +// GFX1013: error: instruction not supported on this GPU v_dot8_i32_i4 v0, v1, v2, v3 // CHECK: error: instruction not supported on this GPU +// GFX1013: error: instruction not supported on this GPU v_dot8_u32_u4 v0, v1, v2, v3 // Index: llvm/test/MC/AMDGPU/gfx10_unsupported.s =================================================================== --- llvm/test/MC/AMDGPU/gfx10_unsupported.s +++ llvm/test/MC/AMDGPU/gfx10_unsupported.s @@ -1,5 +1,6 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1013 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Unsupported instructions. Index: llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml =================================================================== --- llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -162,6 +162,10 @@ # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1012 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1012 %s # RUN: obj2yaml %t.o.AMDGCN_GFX1012 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1012 %s +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX1013/' %s | yaml2obj -o %t.o.AMDGCN_GFX1013 +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1013 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1013 %s +# RUN: obj2yaml %t.o.AMDGCN_GFX1013 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1013 %s + # RUN: sed -e 's//64/' -e 's//AMDGCN_GFX1030/' %s | yaml2obj -o %t.o.AMDGCN_GFX1030 # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1030 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1030 %s # RUN: obj2yaml %t.o.AMDGCN_GFX1030 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1030 %s @@ -322,6 +326,9 @@ # ELF-AMDGCN-GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35) # YAML-AMDGCN-GFX1012: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1012 ] +# ELF-AMDGCN-GFX1013: EF_AMDGPU_MACH_AMDGCN_GFX1013 (0x42) +# YAML-AMDGCN-GFX1013: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1013 ] + # ELF-AMDGCN-GFX1030: EF_AMDGPU_MACH_AMDGCN_GFX1030 (0x36) # YAML-AMDGCN-GFX1030: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1030 ] Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -31,6 +31,11 @@ ; RUN: llvm-objdump -D %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1012 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt Index: llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test =================================================================== --- llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test +++ llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test @@ -223,6 +223,15 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36 Index: llvm/tools/llvm-readobj/ELFDumper.cpp =================================================================== --- llvm/tools/llvm-readobj/ELFDumper.cpp +++ llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1482,6 +1482,7 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1013), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032), @@ -1534,6 +1535,7 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1013), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032), Index: openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp =================================================================== --- openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp +++ openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp @@ -39,6 +39,8 @@ return "gfx1011"; case EF_AMDGPU_MACH_AMDGCN_GFX1012: return "gfx1012"; + case EF_AMDGPU_MACH_AMDGCN_GFX1013: + return "gfx1013"; case EF_AMDGPU_MACH_AMDGCN_GFX1030: return "gfx1030"; case EF_AMDGPU_MACH_AMDGCN_GFX1031: