Index: docs/AMDGPUUsage.rst =================================================================== --- docs/AMDGPUUsage.rst +++ docs/AMDGPUUsage.rst @@ -207,6 +207,8 @@ names. ``gfx906`` ``amdgcn`` dGPU - xnack *TBA* [off] + sram-ecc + [on] .. TODO Add product names. @@ -241,24 +243,26 @@ .. table:: AMDGPU Target Features :name: amdgpu-target-feature-table - ============== ================================================== - Target Feature Description - ============== ================================================== - -m[no-]xnack Enable/disable generating code that has - memory clauses that are compatible with - having XNACK replay enabled. - - This is used for demand paging and page - migration. If XNACK replay is enabled in - the device, then if a page fault occurs - the code may execute incorrectly if the - ``xnack`` feature is not enabled. Executing - code that has the feature enabled on a - device that does not have XNACK replay - enabled will execute correctly, but may - be less performant than code with the - feature disabled. - ============== ================================================== + =============== ================================================== + Target Feature Description + =============== ================================================== + -m[no-]xnack Enable/disable generating code that has + memory clauses that are compatible with + having XNACK replay enabled. + + This is used for demand paging and page + migration. If XNACK replay is enabled in + the device, then if a page fault occurs + the code may execute incorrectly if the + ``xnack`` feature is not enabled. Executing + code that has the feature enabled on a + device that does not have XNACK replay + enabled will execute correctly, but may + be less performant than code with the + feature disabled. + -m[no-]sram-ecc Enable/disable generating code that assumes SRAM + ECC is enabled/disabled. + =============== ================================================== .. _amdgpu-address-spaces: @@ -544,6 +548,17 @@ be 0. See :ref:`amdgpu-target-features`. + ``EF_AMDGPU_SRAM_ECC`` 0x00001000 Indicates if the ``sram-ecc`` + target feature is + enabled for all code + contained in the code object. + If the processor + does not support the + ``sram-ecc`` target + feature then must + be 0. + See + :ref:`amdgpu-target-features`. ================================= ========== ============================= .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values Index: include/llvm/BinaryFormat/ELF.h =================================================================== --- include/llvm/BinaryFormat/ELF.h +++ include/llvm/BinaryFormat/ELF.h @@ -710,9 +710,12 @@ EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX906, - // Indicates if the xnack target feature is enabled for all code contained in - // the object. + // Indicates if the "xnack" target feature is enabled for all code contained + // in the object. EF_AMDGPU_XNACK = 0x100, + // Indicates if the "sram-ecc" target feature is enabled for all code + // contained in the object. + EF_AMDGPU_SRAM_ECC = 0x200, }; // ELF Relocation types for AMDGPU Index: lib/ObjectYAML/ELFYAML.cpp =================================================================== --- lib/ObjectYAML/ELFYAML.cpp +++ lib/ObjectYAML/ELFYAML.cpp @@ -403,6 +403,7 @@ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH); BCase(EF_AMDGPU_XNACK); + BCase(EF_AMDGPU_SRAM_ECC); break; case ELF::EM_X86_64: break; Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -266,13 +266,10 @@ "Has deep learning instructions" >; -def FeatureD16PreservesUnusedBits : SubtargetFeature< - "d16-preserves-unused-bits", - "D16PreservesUnusedBits", - "true", - "If present, then instructions defined by HasD16LoadStore predicate preserve " - "unused bits. Otherwise instructions defined by HasD16LoadStore predicate " - "zero unused bits." +def FeatureSRAMECC : SubtargetFeature<"sram-ecc", + "EnableSRAMECC", + "true", + "Enable SRAM ECC" >; //===------------------------------------------------------------===// @@ -530,28 +527,26 @@ def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, [FeatureGFX9, FeatureMadMixInsts, - FeatureLDSBankCount32, - FeatureD16PreservesUnusedBits]>; + FeatureLDSBankCount32]>; def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, - FeatureXNACK, - FeatureD16PreservesUnusedBits]>; + FeatureXNACK]>; def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4, [FeatureGFX9, FeatureLDSBankCount32, - FeatureFmaMixInsts, - FeatureD16PreservesUnusedBits]>; + FeatureFmaMixInsts]>; def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6, [FeatureGFX9, HalfRate64Ops, FeatureFmaMixInsts, FeatureLDSBankCount32, - FeatureDLInsts]>; + FeatureDLInsts, + FeatureSRAMECC]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. @@ -683,8 +678,9 @@ def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, AssemblerPredicate<"!FeatureUnpackedD16VMem">; -def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">, - AssemblerPredicate<"FeatureD16PreservesUnusedBits">; +def D16PreservesUnusedBits : + Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">, + AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">; def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -353,7 +353,7 @@ bool HasDPP; bool HasR128A16; bool HasDLInsts; - bool D16PreservesUnusedBits; + bool EnableSRAMECC; bool FlatAddressSpace; bool FlatInstOffsets; bool FlatGlobalInsts; @@ -675,8 +675,8 @@ return HasDLInsts; } - bool d16PreservesUnusedBits() const { - return D16PreservesUnusedBits; + bool isSRAMECCEnabled() const { + return EnableSRAMECC; } // Scratch is allocated in 256 dword per wave blocks for the entire Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -199,7 +199,7 @@ HasDPP(false), HasR128A16(false), HasDLInsts(false), - D16PreservesUnusedBits(false), + EnableSRAMECC(false), FlatAddressSpace(false), FlatInstOffsets(false), FlatGlobalInsts(false), Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -345,6 +345,10 @@ if (AMDGPU::hasXNACK(STI)) EFlags |= ELF::EF_AMDGPU_XNACK; + EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC; + if (AMDGPU::hasSRAMECC(STI)) + EFlags |= ELF::EF_AMDGPU_SRAM_ECC; + MCA.setELFHeaderEFlags(EFlags); } Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -342,6 +342,7 @@ } bool hasXNACK(const MCSubtargetInfo &STI); +bool hasSRAMECC(const MCSubtargetInfo &STI); bool hasMIMG_R128(const MCSubtargetInfo &STI); bool hasPackedD16(const MCSubtargetInfo &STI); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -152,6 +152,8 @@ if (hasXNACK(*STI)) Stream << "+xnack"; + if (hasSRAMECC(*STI)) + Stream << "+sram-ecc"; Stream.flush(); } @@ -593,6 +595,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; } +bool hasSRAMECC(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; +} + bool hasMIMG_R128(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; } Index: test/CodeGen/AMDGPU/directive-amdgcn-target.ll =================================================================== --- test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -34,6 +34,12 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+code-object-v3,-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,-sram-ecc < %s | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX906 %s + ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" ; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" ; GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" @@ -48,10 +54,16 @@ ; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack" ; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" -; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" +; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc" ; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" -; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902 +; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" + +; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sram-ecc" +; NO-SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906" + +; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sram-ecc" +; XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc" define amdgpu_kernel void @directive_amdgcn_target() { ret void Index: test/CodeGen/AMDGPU/elf-header-flags-mach.ll =================================================================== --- test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -85,6 +85,7 @@ ; GFX902-NEXT: EF_AMDGPU_XNACK (0x100) ; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E) ; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) ; ALL: ] define amdgpu_kernel void @elf_header() { Index: test/CodeGen/AMDGPU/elf-header-flags-sram-ecc.ll =================================================================== --- test/CodeGen/AMDGPU/elf-header-flags-sram-ecc.ll +++ test/CodeGen/AMDGPU/elf-header-flags-sram-ecc.ll @@ -0,0 +1,38 @@ +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX902 %s + +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc,+xnack < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s + +; NO-SRAM-ECC-GFX902: Flags [ +; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D) +; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100) +; NO-SRAM-ECC-GFX902-NEXT: ] + +; SRAM-ECC-GFX902: Flags [ +; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D) +; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100) +; SRAM-ECC-GFX902-NEXT: ] + +; NO-SRAM-ECC-GFX906: Flags [ +; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; NO-SRAM-ECC-GFX906-NEXT: ] + +; SRAM-ECC-GFX906: Flags [ +; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +; SRAM-ECC-GFX906-NEXT: ] + +; SRAM-ECC-XNACK-GFX906: Flags [ +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100) +; SRAM-ECC-XNACK-GFX906-NEXT: ] + +define amdgpu_kernel void @elf_header() { + ret void +} Index: test/Object/AMDGPU/elf-header-flags-sram-ecc.yaml =================================================================== --- test/Object/AMDGPU/elf-header-flags-sram-ecc.yaml +++ test/Object/AMDGPU/elf-header-flags-sram-ecc.yaml @@ -0,0 +1,61 @@ +# RUN: yaml2obj -docnum=1 %s > %t.o.1 +# RUN: llvm-readobj -s -file-headers %t.o.1 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-NONE %s +# RUN: obj2yaml %t.o.1 | FileCheck --check-prefixes=YAML-SRAM-ECC-NONE %s +# RUN: yaml2obj -docnum=2 %s > %t.o.2 +# RUN: llvm-readobj -s -file-headers %t.o.2 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-GFX900 %s +# RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-SRAM-ECC-GFX900 %s +# RUN: yaml2obj -docnum=3 %s > %t.o.3 +# RUN: llvm-readobj -s -file-headers %t.o.3 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-XNACK-GFX900 %s +# RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s + +# ELF-SRAM-ECC-NONE: Flags [ +# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +# ELF-SRAM-ECC-NONE-NEXT: ] + +# ELF-SRAM-ECC-GFX900: Flags [ +# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +# ELF-SRAM-ECC-GFX900-NEXT: ] + +# ELF-SRAM-ECC-XNACK-GFX900: Flags [ +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100) +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: ] + +# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ] +# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ] +# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ] + +# Doc1 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_NONE + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_SRAM_ECC ] +... + +# Doc2 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_NONE + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ] +... + +# Doc3 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_NONE + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ] +... Index: tools/llvm-readobj/ELFDumper.cpp =================================================================== --- tools/llvm-readobj/ELFDumper.cpp +++ tools/llvm-readobj/ELFDumper.cpp @@ -1325,7 +1325,8 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906), - LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK) + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC) }; static const EnumEntry ElfHeaderRISCVFlags[] = {