diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h --- a/libc/src/__support/GPU/amdgpu/utils.h +++ b/libc/src/__support/GPU/amdgpu/utils.h @@ -144,6 +144,21 @@ __builtin_amdgcn_wave_barrier(); } +/// Returns the current value of the GPU's processor clock. +/// NOTE: The RDNA3 architecture replaced this with a 20-bit cycle counter. +LIBC_INLINE uint64_t clock() { return __builtin_readcyclecounter(); } + +/// Returns a fixed-frequency timestamp. The actual frequency is dependent on +/// the card and can only be queried via the driver. +LIBC_INLINE uint64_t time() { +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || \ + defined(__gfx1103__) + return __builtin_amdgcn_s_sendmsg_rtnl(0x83); +#else + return __builtin_amdgcn_s_memrealtime(); +#endif +} + } // namespace gpu } // namespace __llvm_libc diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h --- a/libc/src/__support/GPU/generic/utils.h +++ b/libc/src/__support/GPU/generic/utils.h @@ -67,6 +67,10 @@ LIBC_INLINE void sync_lane(uint64_t) {} +LIBC_INLINE uint64_t clock() { return 0; } + +LIBC_INLINE uint64_t time() { return 0; } + } // namespace gpu } // namespace __llvm_libc diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h --- a/libc/src/__support/GPU/nvptx/utils.h +++ b/libc/src/__support/GPU/nvptx/utils.h @@ -134,6 +134,20 @@ __nvvm_bar_warp_sync(mask); } +/// Returns the current value of the GPU's processor clock. +LIBC_INLINE uint64_t clock() { + uint64_t timestamp; + asm volatile("mov.u64 %0, %%clock64;" : "=l"(timestamp)); + return timestamp; +} + +/// Returns a global fixed-frequency timer at nanosecond frequency. +LIBC_INLINE uint64_t time() { + uint64_t nsecs; + asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(nsecs)); + return nsecs; +} + } // namespace gpu } // namespace __llvm_libc