diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -192,6 +192,7 @@ Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; break; case GK_GFX1012: case GK_GFX1011: diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -58,9 +58,9 @@ // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" kernel void test() {} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl deleted file mode 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl +++ /dev/null @@ -1,7 +0,0 @@ -// REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1030 -verify -S -o - %s - -void test_gfx1030_s_memtime() -{ - __builtin_amdgcn_s_memtime(); // expected-error {{'__builtin_amdgcn_s_memtime' needs target feature s-memtime-inst}} -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -563,6 +563,12 @@ "Has s_memtime instruction" >; +def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", + "HasShaderCyclesRegister", + "true", + "Has SHADER_CYCLES hardware register" +>; + def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", "HasMadMacF32Insts", "true", @@ -777,7 +783,7 @@ FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, - FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, + FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess ] >; @@ -988,7 +994,6 @@ FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureGetWaveIdInst, - FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, @@ -1009,7 +1014,6 @@ FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureGetWaveIdInst, - FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, @@ -1030,7 +1034,6 @@ FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureGetWaveIdInst, - FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, @@ -1047,7 +1050,8 @@ FeatureDot5Insts, FeatureDot6Insts, FeatureNSAEncoding, - FeatureWavefrontSize32]>; + FeatureWavefrontSize32, + FeatureShaderCyclesRegister]>; //===----------------------------------------------------------------------===// @@ -1377,7 +1381,8 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, AssemblerPredicate<(all_of FeatureSMemTimeInst)>; -def HasNoSMemTimeInst : Predicate<"!Subtarget->hasSMemTimeInst()">; +def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, + AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -163,6 +163,7 @@ bool HasVscnt; bool HasGetWaveIdInst; bool HasSMemTimeInst; + bool HasShaderCyclesRegister; bool HasRegisterBanking; bool HasVOP3Literal; bool HasNoDataDepHazard; @@ -714,6 +715,10 @@ return HasSMemTimeInst; } + bool hasShaderCyclesRegister() const { + return HasShaderCyclesRegister; + } + bool hasRegisterBanking() const { return HasRegisterBanking; } diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -866,14 +866,16 @@ >; } // let OtherPredicates = [HasSMemTimeInst] -let OtherPredicates = [HasNoSMemTimeInst] in { +let OtherPredicates = [HasShaderCyclesRegister] in { def : GCNPat < (i64 (readcyclecounter)), (REG_SEQUENCE SReg_64, (S_GETREG_B32 getHwRegImm.ret), sub0, - (S_MOV_B32 (i32 0)), sub1) ->; -} // let OtherPredicates = [HasNoSMemTimeInst] + (S_MOV_B32 (i32 0)), sub1)> { + // Prefer this to s_memtime because it has lower and more predictable latency. + let AddedComplexity = 1; +} +} // let OtherPredicates = [HasShaderCyclesRegister] //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=SIVI,GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=SIVI,GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GFX1030-ERR %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.s.memtime() #0 @@ -13,7 +13,6 @@ ; SIVI-NOT: lgkmcnt ; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}} ; GCN: {{buffer|global}}_store_dwordx2 -; GFX1030-ERR: ERROR define amdgpu_kernel void @test_s_memtime(i64 addrspace(1)* %out) #0 { %cycle0 = call i64 @llvm.amdgcn.s.memtime() store volatile i64 %cycle0, i64 addrspace(1)* %out diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -21,9 +21,6 @@ s_get_waveid_in_workgroup s0 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU -s_memtime s[0:1] -// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: specified hardware register is not supported on this GPU