Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -281,6 +281,12 @@ "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" >; +def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support", + "DoesNotSupportSRAMECC", + "true", + "Hardware does not support SRAM ECC" +>; + def FeatureSRAMECC : SubtargetFeature<"sram-ecc", "EnableSRAMECC", "true", @@ -437,13 +443,15 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, - FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange] + FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange, + FeatureDoesNotSupportSRAMECC] >; def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureFlatAddressSpace, - FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange] + FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, + FeatureDoesNotSupportSRAMECC] >; def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", @@ -453,7 +461,7 @@ FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, - FeatureIntClamp, FeatureTrigReducedRange + FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC ] >; @@ -545,19 +553,22 @@ [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, - FeatureCodeObjectV3]>; + FeatureCodeObjectV3, + FeatureDoesNotSupportSRAMECC]>; def FeatureISAVersion9_0_2 : FeatureSet< [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, FeatureXNACK, + FeatureDoesNotSupportSRAMECC, FeatureCodeObjectV3]>; def FeatureISAVersion9_0_4 : FeatureSet< [FeatureGFX9, FeatureLDSBankCount32, FeatureFmaMixInsts, + FeatureDoesNotSupportSRAMECC, FeatureCodeObjectV3]>; def FeatureISAVersion9_0_6 : FeatureSet< Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -332,6 +332,7 @@ bool HasDot1Insts; bool HasDot2Insts; bool EnableSRAMECC; + bool DoesNotSupportSRAMECC; bool FlatAddressSpace; bool FlatInstOffsets; bool FlatGlobalInsts; Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -64,7 +64,7 @@ GCNSubtarget & GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS) { + StringRef GPU, StringRef FS) { // Determine default and user-specified characteristics // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be // enabled, but some instructions do not respect them and they run at the @@ -77,7 +77,8 @@ // Similarly we want enable-prt-strict-null to be on by default and not to // unset everything else if it is disabled - SmallString<256> FullFS("+promote-alloca,+load-store-opt,"); + // Assuming ECC is enabled is the conservative default. + SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; @@ -129,6 +130,14 @@ HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; + // ECC is on by default, but turn it off if the hardware doesn't support it + // anyway. This matters for the gfx9 targets with d16 loads, but don't support + // ECC. + if (DoesNotSupportSRAMECC && EnableSRAMECC) { + ToggleFeature(AMDGPU::FeatureSRAMECC); + EnableSRAMECC = false; + } + return *this; } @@ -206,6 +215,7 @@ HasDot1Insts(false), HasDot2Insts(false), EnableSRAMECC(false), + DoesNotSupportSRAMECC(false), FlatAddressSpace(false), FlatInstOffsets(false), FlatGlobalInsts(false), Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -84,6 +84,10 @@ AMDGPU::FeatureTrapHandler, AMDGPU::FeatureCodeObjectV3, + // The default assumption needs to be ecc is enabled, but no directly + // exposed operations depend on it, so it can be safely inlined. + AMDGPU::FeatureSRAMECC, + // Perf-tuning features AMDGPU::FeatureFastFMAF32, AMDGPU::HalfRate64Ops Index: test/CodeGen/AMDGPU/load-hi16.ll =================================================================== --- test/CodeGen/AMDGPU/load-hi16.ll +++ test/CodeGen/AMDGPU/load-hi16.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s -; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX803,NO-D16-HI %s ; GCN-LABEL: {{^}}load_local_lo_hi_v2i16_multi_use_lo: Index: test/CodeGen/AMDGPU/sram-ecc-default.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sram-ecc-default.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s + +; Make sure the correct set of targets are marked with +; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never +; supported. + +; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg: +; NO-ECC: global_load_short_d16_hi +; ECC: global_load_ushort +define void @load_global_hi_v2i16_reglo_vreg(i16 addrspace(1)* %in, i16 %reg) { +entry: + %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047 + %load = load i16, i16 addrspace(1)* %gep + %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 + %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1 + store <2 x i16> %build1, <2 x i16> addrspace(1)* undef + ret void +} Index: test/Transforms/Inline/AMDGPU/inline-target-feature-sram-ecc.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/AMDGPU/inline-target-feature-sram-ecc.ll @@ -0,0 +1,70 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +; sram-ecc can be safely ignored when inlining, since no intrinisics +; or other directly exposed operations depend on it. + +define i32 @func_default() #0 { + ret i32 0 +} + +define i32 @func_ecc_enabled() #1 { + ret i32 0 +} + +define i32 @func_ecc_disabled() #2 { + ret i32 0 +} + +; CHECK-LABEL: @default_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ecc_enabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ecc_enabled_call_ecc_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_enabled_call_ecc_enabled() #1 { + %call = call i32 @func_ecc_enabled() + ret i32 %call +} + +; CHECK-LABEL: @ecc_enabled_call_ecc_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_enabled_call_ecc_disabled() #1 { + %call = call i32 @func_ecc_disabled() + ret i32 %call +} + +; CHECK-LABEL: @ecc_disabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ecc_disabled_call_ecc_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_disabled_call_ecc_enabled() #2 { + %call = call i32 @func_ecc_enabled() + ret i32 %call +} + +; CHECK-LABEL: @ecc_disabled_call_ecc_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_disabled_call_ecc_disabled() #2 { + %call = call i32 @func_ecc_disabled() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "target-features"="+sram-ecc" } +attributes #2 = { nounwind "target-features"="-sram-ecc" }