diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -899,6 +899,12 @@ "Enable the architected SGPRs" >; +def FeatureGDS : SubtargetFeature<"gds", + "HasGDS", + "true", + "Has Global Data Share" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -917,7 +923,8 @@ [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, - FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts + FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, + FeatureGDS ] >; @@ -928,7 +935,7 @@ FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, - FeatureImageInsts + FeatureImageInsts, FeatureGDS ] >; @@ -943,7 +950,7 @@ FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, - FeatureUnalignedBufferAccess, FeatureImageInsts + FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS ] >; @@ -961,7 +968,7 @@ FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, - FeatureNegativeScratchOffsetBug + FeatureNegativeScratchOffsetBug, FeatureGDS ] >; @@ -980,7 +987,8 @@ FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts, + FeatureGDS ] >; @@ -999,7 +1007,7 @@ FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureA16, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS ] >; @@ -1836,6 +1844,8 @@ def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; +def HasGDS : Predicate<"Subtarget->hasGDS()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -969,8 +969,10 @@ !cast(frag#"_local_"#vt.Size)>; } - def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + } } multiclass DSAtomicRetNoRetPat_mc(frag#"_local_noret_"#vt.Size), /* complexity */ 1>; } - def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; - def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt.Size), - /* complexity */ 1, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; + } } @@ -1024,10 +1028,12 @@ /* complexity */ 1>; } - def : DSAtomicCmpXChgSwapped(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; - def : DSAtomicCmpXChgSwapped(frag#"_region_m0_noret_"#vt.Size), - /* complexity */ 1, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicCmpXChgSwapped(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicCmpXChgSwapped(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; + } } } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 @@ -1047,10 +1053,12 @@ def : DSAtomicCmpXChg(!cast(noRetInst)#"_gfx9"), vt, !cast(frag#"_local_noret_"#vt.Size), /* complexity */ 1>; - def : DSAtomicCmpXChg(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; - def : DSAtomicCmpXChg(frag#"_region_m0_noret_"#vt.Size), - /* complexity */ 1, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicCmpXChg(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicCmpXChg(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; + } } } // End SubtargetPredicate = isGFX11Plus diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -9,11 +9,11 @@ // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, - [FeatureWavefrontSize64] + [FeatureWavefrontSize64, FeatureGDS] >; def : ProcessorModel<"generic-hsa", NoSchedModel, - [FeatureWavefrontSize64, FeatureFlatAddressSpace] + [FeatureWavefrontSize64, FeatureGDS, FeatureFlatAddressSpace] >; //===------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -180,6 +180,7 @@ bool HasArchitectedFlatScratch = false; bool EnableFlatScratch = false; bool HasArchitectedSGPRs = false; + bool HasGDS = false; bool AddNoCarryInsts = false; bool HasUnpackedD16VMem = false; bool LDSMisalignedBug = false; @@ -1155,6 +1156,9 @@ /// \returns true if the architected SGPRs are enabled. bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; } + /// \returns true if Global Data Share is supported. + bool hasGDS() const { return HasGDS; } + /// \returns true if the machine has merged shaders in which s0-s7 are /// reserved by the hardware and user SGPRs start at s8 bool hasMergedShaders() const {