diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h --- a/libc/src/__support/GPU/nvptx/utils.h +++ b/libc/src/__support/GPU/nvptx/utils.h @@ -100,7 +100,7 @@ /// Returns the bit-mask of active threads in the current warp. [[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() { uint32_t mask; - asm volatile("activemask.b32 %0;" : "=r"(mask)); + LIBC_INLINE_ASM("activemask.b32 %0;" : "=r"(mask)); return mask; } diff --git a/libc/src/__support/OSUtil/gpu/quick_exit.cpp b/libc/src/__support/OSUtil/gpu/quick_exit.cpp --- a/libc/src/__support/OSUtil/gpu/quick_exit.cpp +++ b/libc/src/__support/OSUtil/gpu/quick_exit.cpp @@ -24,11 +24,11 @@ port.close(); #if defined(LIBC_TARGET_ARCH_IS_NVPTX) - asm("exit;" ::: "memory"); + LIBC_INLINE_ASM("exit;" ::: "memory"); #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) // This will terminate the entire wavefront, may not be valid with divergent // work items. - asm("s_endpgm" ::: "memory"); + __builtin_amdgcn_endpgm(); #endif __builtin_unreachable(); } diff --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h --- a/libc/src/__support/RPC/rpc_util.h +++ b/libc/src/__support/RPC/rpc_util.h @@ -23,7 +23,7 @@ /// Suspend the thread briefly to assist the thread scheduler during busy loops. LIBC_INLINE void sleep_briefly() { #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700 - asm("nanosleep.u32 64;" ::: "memory"); + LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory"); #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) __builtin_amdgcn_s_sleep(2); #else