Index: llvm/trunk/docs/AMDGPUUsage.rst =================================================================== --- llvm/trunk/docs/AMDGPUUsage.rst +++ llvm/trunk/docs/AMDGPUUsage.rst @@ -207,6 +207,8 @@ names. ``gfx906`` ``amdgcn`` dGPU - xnack *TBA* [off] + sram-ecc + [on] .. TODO Add product names. @@ -246,24 +248,26 @@ .. table:: AMDGPU Target Features :name: amdgpu-target-feature-table - ============== ================================================== - Target Feature Description - ============== ================================================== - -m[no-]xnack Enable/disable generating code that has - memory clauses that are compatible with - having XNACK replay enabled. - - This is used for demand paging and page - migration. If XNACK replay is enabled in - the device, then if a page fault occurs - the code may execute incorrectly if the - ``xnack`` feature is not enabled. Executing - code that has the feature enabled on a - device that does not have XNACK replay - enabled will execute correctly, but may - be less performant than code with the - feature disabled. - ============== ================================================== + =============== ================================================== + Target Feature Description + =============== ================================================== + -m[no-]xnack Enable/disable generating code that has + memory clauses that are compatible with + having XNACK replay enabled. + + This is used for demand paging and page + migration. If XNACK replay is enabled in + the device, then if a page fault occurs + the code may execute incorrectly if the + ``xnack`` feature is not enabled. Executing + code that has the feature enabled on a + device that does not have XNACK replay + enabled will execute correctly, but may + be less performant than code with the + feature disabled. + -m[no-]sram-ecc Enable/disable generating code that assumes SRAM + ECC is enabled/disabled. + =============== ================================================== .. _amdgpu-address-spaces: @@ -549,6 +553,17 @@ be 0. See :ref:`amdgpu-target-features`. + ``EF_AMDGPU_SRAM_ECC`` 0x00000200 Indicates if the ``sram-ecc`` + target feature is + enabled for all code + contained in the code object. + If the processor + does not support the + ``sram-ecc`` target + feature then must + be 0. + See + :ref:`amdgpu-target-features`. ================================= ========== ============================= .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values Index: llvm/trunk/include/llvm/BinaryFormat/ELF.h =================================================================== --- llvm/trunk/include/llvm/BinaryFormat/ELF.h +++ llvm/trunk/include/llvm/BinaryFormat/ELF.h @@ -711,9 +711,12 @@ EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909, - // Indicates if the xnack target feature is enabled for all code contained in - // the object. + // Indicates if the "xnack" target feature is enabled for all code contained + // in the object. EF_AMDGPU_XNACK = 0x100, + // Indicates if the "sram-ecc" target feature is enabled for all code + // contained in the object. + EF_AMDGPU_SRAM_ECC = 0x200, }; // ELF Relocation types for AMDGPU Index: llvm/trunk/lib/ObjectYAML/ELFYAML.cpp =================================================================== --- llvm/trunk/lib/ObjectYAML/ELFYAML.cpp +++ llvm/trunk/lib/ObjectYAML/ELFYAML.cpp @@ -404,6 +404,7 @@ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH); BCase(EF_AMDGPU_XNACK); + BCase(EF_AMDGPU_SRAM_ECC); break; case ELF::EM_X86_64: break; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td @@ -266,13 +266,10 @@ "Has deep learning instructions" >; -def FeatureD16PreservesUnusedBits : SubtargetFeature< - "d16-preserves-unused-bits", - "D16PreservesUnusedBits", - "true", - "If present, then instructions defined by HasD16LoadStore predicate preserve " - "unused bits. Otherwise instructions defined by HasD16LoadStore predicate " - "zero unused bits." +def FeatureSRAMECC : SubtargetFeature<"sram-ecc", + "EnableSRAMECC", + "true", + "Enable SRAM ECC" >; //===------------------------------------------------------------===// @@ -524,35 +521,32 @@ def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, [FeatureGFX9, FeatureMadMixInsts, - FeatureLDSBankCount32, - FeatureD16PreservesUnusedBits]>; + FeatureLDSBankCount32]>; def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, - FeatureXNACK, - FeatureD16PreservesUnusedBits]>; + FeatureXNACK]>; def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4, [FeatureGFX9, FeatureLDSBankCount32, - FeatureFmaMixInsts, - FeatureD16PreservesUnusedBits]>; + FeatureFmaMixInsts]>; def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6, [FeatureGFX9, HalfRate64Ops, FeatureFmaMixInsts, FeatureLDSBankCount32, - FeatureDLInsts]>; + FeatureDLInsts, + FeatureSRAMECC]>; def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9, [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, - FeatureXNACK, - FeatureD16PreservesUnusedBits]>; + FeatureXNACK]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. @@ -684,8 +678,9 @@ def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, AssemblerPredicate<"!FeatureUnpackedD16VMem">; -def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">, - AssemblerPredicate<"FeatureD16PreservesUnusedBits">; +def D16PreservesUnusedBits : + Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">, + AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">; def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -353,7 +353,7 @@ bool HasDPP; bool HasR128A16; bool HasDLInsts; - bool D16PreservesUnusedBits; + bool EnableSRAMECC; bool FlatAddressSpace; bool FlatInstOffsets; bool FlatGlobalInsts; @@ -679,8 +679,8 @@ return HasDLInsts; } - bool d16PreservesUnusedBits() const { - return D16PreservesUnusedBits; + bool isSRAMECCEnabled() const { + return EnableSRAMECC; } // Scratch is allocated in 256 dword per wave blocks for the entire Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -198,7 +198,7 @@ HasDPP(false), HasR128A16(false), HasDLInsts(false), - D16PreservesUnusedBits(false), + EnableSRAMECC(false), FlatAddressSpace(false), FlatInstOffsets(false), FlatGlobalInsts(false), Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -347,6 +347,10 @@ if (AMDGPU::hasXNACK(STI)) EFlags |= ELF::EF_AMDGPU_XNACK; + EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC; + if (AMDGPU::hasSRAMECC(STI)) + EFlags |= ELF::EF_AMDGPU_SRAM_ECC; + MCA.setELFHeaderEFlags(EFlags); } Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -342,6 +342,7 @@ } bool hasXNACK(const MCSubtargetInfo &STI); +bool hasSRAMECC(const MCSubtargetInfo &STI); bool hasMIMG_R128(const MCSubtargetInfo &STI); bool hasPackedD16(const MCSubtargetInfo &STI); Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -152,6 +152,8 @@ if (hasXNACK(*STI)) Stream << "+xnack"; + if (hasSRAMECC(*STI)) + Stream << "+sram-ecc"; Stream.flush(); } @@ -593,6 +595,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; } +bool hasSRAMECC(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; +} + bool hasMIMG_R128(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; } Index: llvm/trunk/test/CodeGen/AMDGPU/directive-amdgcn-target.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ llvm/trunk/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -34,6 +34,12 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+code-object-v3,-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,-sram-ecc < %s | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX906 %s + ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" ; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" ; GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" @@ -48,10 +54,16 @@ ; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack" ; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" -; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" +; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc" ; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" -; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902 +; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" + +; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sram-ecc" +; NO-SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906" + +; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sram-ecc" +; XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc" define amdgpu_kernel void @directive_amdgcn_target() { ret void Index: llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -86,6 +86,7 @@ ; GFX902-NEXT: EF_AMDGPU_XNACK (0x100) ; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E) ; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) ; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31) ; ALL: ] Index: llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-sram-ecc.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-sram-ecc.ll +++ llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-sram-ecc.ll @@ -0,0 +1,38 @@ +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX902 %s + +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc,+xnack < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s + +; NO-SRAM-ECC-GFX902: Flags [ +; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D) +; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100) +; NO-SRAM-ECC-GFX902-NEXT: ] + +; SRAM-ECC-GFX902: Flags [ +; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D) +; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100) +; SRAM-ECC-GFX902-NEXT: ] + +; NO-SRAM-ECC-GFX906: Flags [ +; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; NO-SRAM-ECC-GFX906-NEXT: ] + +; SRAM-ECC-GFX906: Flags [ +; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +; SRAM-ECC-GFX906-NEXT: ] + +; SRAM-ECC-XNACK-GFX906: Flags [ +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100) +; SRAM-ECC-XNACK-GFX906-NEXT: ] + +define amdgpu_kernel void @elf_header() { + ret void +} Index: llvm/trunk/test/Object/AMDGPU/elf-header-flags-sram-ecc.yaml =================================================================== --- llvm/trunk/test/Object/AMDGPU/elf-header-flags-sram-ecc.yaml +++ llvm/trunk/test/Object/AMDGPU/elf-header-flags-sram-ecc.yaml @@ -0,0 +1,61 @@ +# RUN: yaml2obj -docnum=1 %s > %t.o.1 +# RUN: llvm-readobj -s -file-headers %t.o.1 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-NONE %s +# RUN: obj2yaml %t.o.1 | FileCheck --check-prefixes=YAML-SRAM-ECC-NONE %s +# RUN: yaml2obj -docnum=2 %s > %t.o.2 +# RUN: llvm-readobj -s -file-headers %t.o.2 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-GFX900 %s +# RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-SRAM-ECC-GFX900 %s +# RUN: yaml2obj -docnum=3 %s > %t.o.3 +# RUN: llvm-readobj -s -file-headers %t.o.3 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-XNACK-GFX900 %s +# RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s + +# ELF-SRAM-ECC-NONE: Flags [ +# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +# ELF-SRAM-ECC-NONE-NEXT: ] + +# ELF-SRAM-ECC-GFX900: Flags [ +# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +# ELF-SRAM-ECC-GFX900-NEXT: ] + +# ELF-SRAM-ECC-XNACK-GFX900: Flags [ +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100) +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: ] + +# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ] +# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ] +# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ] + +# Doc1 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_NONE + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_SRAM_ECC ] +... + +# Doc2 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_NONE + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ] +... + +# Doc3 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_NONE + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ] +... Index: llvm/trunk/tools/llvm-readobj/ELFDumper.cpp =================================================================== --- llvm/trunk/tools/llvm-readobj/ELFDumper.cpp +++ llvm/trunk/tools/llvm-readobj/ELFDumper.cpp @@ -1355,7 +1355,8 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909), - LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK) + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC) }; static const EnumEntry ElfHeaderRISCVFlags[] = {