diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h --- a/libc/src/__support/GPU/amdgpu/utils.h +++ b/libc/src/__support/GPU/amdgpu/utils.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_SUPPORT_GPU_AMDGPU_IO_H #include "src/__support/common.h" +#include "src/__support/macros/config.h" #include @@ -144,6 +145,30 @@ __builtin_amdgcn_wave_barrier(); } +/// Returns the current value of the GPU's processor clock. +/// NOTE: The RDNA3 and RDNA2 architectures use a 20-bit cycle cycle counter. +LIBC_INLINE uint64_t processor_clock() { + if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memtime)) + return __builtin_amdgcn_s_memtime(); + else if constexpr (LIBC_HAS_BUILTIN(__builtin_readcyclecounter)) + return __builtin_readcyclecounter(); + else + return 0; +} + +/// Returns a fixed-frequency timestamp. The actual frequency is dependent on +/// the card and can only be queried via the driver. +LIBC_INLINE uint64_t fixed_frequrency_clock() { + if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl)) + return __builtin_amdgcn_s_sendmsg_rtnl(0x83); + else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memrealtime)) + return __builtin_amdgcn_s_memrealtime(); + else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memtime)) + return __builtin_amdgcn_s_memtime(); + else + return 0; +} + } // namespace gpu } // namespace __llvm_libc diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h --- a/libc/src/__support/GPU/generic/utils.h +++ b/libc/src/__support/GPU/generic/utils.h @@ -67,6 +67,10 @@ LIBC_INLINE void sync_lane(uint64_t) {} +LIBC_INLINE uint64_t processor_clock() { return 0; } + +LIBC_INLINE uint64_t fixed_frequrency_clock() { return 0; } + } // namespace gpu } // namespace __llvm_libc diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h --- a/libc/src/__support/GPU/nvptx/utils.h +++ b/libc/src/__support/GPU/nvptx/utils.h @@ -134,6 +134,20 @@ __nvvm_bar_warp_sync(mask); } +/// Returns the current value of the GPU's processor clock. +LIBC_INLINE uint64_t processor_clock() { + uint64_t timestamp; + LIBC_INLINE_ASM("mov.u64 %0, %%clock64;" : "=l"(timestamp)); + return timestamp; +} + +/// Returns a global fixed-frequency timer at nanosecond frequency. +LIBC_INLINE uint64_t fixed_frequrency_clock() { + uint64_t nsecs; + LIBC_INLINE_ASM("mov.u64 %0, %%globaltimer;" : "=l"(nsecs)); + return nsecs; +} + } // namespace gpu } // namespace __llvm_libc