Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -108,6 +108,12 @@ "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" >; +def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support", + "DoesNotSupportXNACK", + "true", + "Hardware does not support XNACK" +>; + // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support // XNACK. The current default kernel driver setting is: // - graphics ring: XNACK disabled @@ -560,7 +566,7 @@ [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange, - FeatureDoesNotSupportSRAMECC] + FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK] >; def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", @@ -624,16 +630,19 @@ FeatureFastFMAF32, HalfRate64Ops, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion6_0_1 : FeatureSet< [FeatureSouthernIslands, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_0 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_1 : FeatureSet< @@ -641,22 +650,26 @@ HalfRate64Ops, FeatureLDSBankCount32, FeatureFastFMAF32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_2 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount16, FeatureFastFMAF32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_3 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount16, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_4 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion8_0_1 : FeatureSet< @@ -673,12 +686,14 @@ FeatureLDSBankCount32, FeatureSGPRInitBug, FeatureUnpackedD16VMem, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion8_0_3 : FeatureSet< [FeatureVolcanicIslands, FeatureLDSBankCount32, FeatureUnpackedD16VMem, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion8_1_0 : FeatureSet< @@ -692,6 +707,7 @@ FeatureMadMixInsts, FeatureLDSBankCount32, FeatureCodeObjectV3, + FeatureDoesNotSupportXNACK, FeatureDoesNotSupportSRAMECC]>; def FeatureISAVersion9_0_2 : FeatureSet< @@ -706,6 +722,7 @@ [FeatureGFX9, FeatureLDSBankCount32, FeatureFmaMixInsts, + FeatureDoesNotSupportXNACK, FeatureDoesNotSupportSRAMECC, FeatureCodeObjectV3]>; @@ -717,6 +734,7 @@ FeatureDLInsts, FeatureDot1Insts, FeatureDot2Insts, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion9_0_9 : FeatureSet< @@ -752,6 +770,7 @@ FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureLdsMisalignedBug, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3])>; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -294,6 +294,7 @@ bool UnalignedBufferAccess; bool HasApertureRegs; bool EnableXNACK; + bool DoesNotSupportXNACK; bool EnableCuMode; bool TrapHandler; Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -78,7 +78,7 @@ // unset everything else if it is disabled // Assuming ECC is enabled is the conservative default. - SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,"); + SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; @@ -130,6 +130,11 @@ HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; + if (DoesNotSupportXNACK && EnableXNACK) { + ToggleFeature(AMDGPU::FeatureXNACK); + EnableXNACK = false; + } + // ECC is on by default, but turn it off if the hardware doesn't support it // anyway. This matters for the gfx9 targets with d16 loads, but don't support // ECC. @@ -181,6 +186,7 @@ HasApertureRegs(false), EnableXNACK(false), + DoesNotSupportXNACK(false), EnableCuMode(false), TrapHandler(false), Index: test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir =================================================================== --- test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir +++ test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir @@ -1,4 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s + +# Make sure the default assumption is xnack enabled with no cpu +# RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+volcanic-islands -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s --- # Trivial clause at beginning of program Index: test/CodeGen/AMDGPU/spill-cfg-position.ll =================================================================== --- test/CodeGen/AMDGPU/spill-cfg-position.ll +++ test/CodeGen/AMDGPU/spill-cfg-position.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s ; Inline spiller can decide to move a spill as early as possible in the basic block. ; It will skip phis and label, but we also need to make sure it skips instructions Index: test/Transforms/Inline/AMDGPU/inline-target-feature-xnack.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/AMDGPU/inline-target-feature-xnack.ll @@ -0,0 +1,67 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +define i32 @func_default() #0 { + ret i32 0 +} + +define i32 @func_xnack_enabled() #1 { + ret i32 0 +} + +define i32 @func_xnack_disabled() #2 { + ret i32 0 +} + +; CHECK-LABEL: @default_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @xnack_enabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @xnack_enabled_call_xnack_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_enabled_call_xnack_enabled() #1 { + %call = call i32 @func_xnack_enabled() + ret i32 %call +} + +; CHECK-LABEL: @xnack_enabled_call_xnack_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_enabled_call_xnack_disabled() #1 { + %call = call i32 @func_xnack_disabled() + ret i32 %call +} + +; CHECK-LABEL: @xnack_disabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @xnack_disabled_call_xnack_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_disabled_call_xnack_enabled() #2 { + %call = call i32 @func_xnack_enabled() + ret i32 %call +} + +; CHECK-LABEL: @xnack_disabled_call_xnack_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_disabled_call_xnack_disabled() #2 { + %call = call i32 @func_xnack_disabled() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "target-features"="+xnack" } +attributes #2 = { nounwind "target-features"="-xnack" }