diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -100,7 +100,7 @@
 /// Returns the bit-mask of active threads in the current warp.
 [[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() {
   uint32_t mask;
-  asm volatile("activemask.b32 %0;" : "=r"(mask));
+  LIBC_INLINE_ASM("activemask.b32 %0;" : "=r"(mask));
   return mask;
 }
 
diff --git a/libc/src/__support/OSUtil/gpu/quick_exit.cpp b/libc/src/__support/OSUtil/gpu/quick_exit.cpp
--- a/libc/src/__support/OSUtil/gpu/quick_exit.cpp
+++ b/libc/src/__support/OSUtil/gpu/quick_exit.cpp
@@ -24,11 +24,11 @@
   port.close();
 
 #if defined(LIBC_TARGET_ARCH_IS_NVPTX)
-  asm("exit;" ::: "memory");
+  LIBC_INLINE_ASM("exit;" ::: "memory");
 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
   // This will terminate the entire wavefront, may not be valid with divergent
   // work items.
-  asm("s_endpgm" ::: "memory");
+  __builtin_amdgcn_endpgm();
 #endif
   __builtin_unreachable();
 }
diff --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h
--- a/libc/src/__support/RPC/rpc_util.h
+++ b/libc/src/__support/RPC/rpc_util.h
@@ -23,7 +23,7 @@
 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
 LIBC_INLINE void sleep_briefly() {
 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
-  asm("nanosleep.u32 64;" ::: "memory");
+  LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");
 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
   __builtin_amdgcn_s_sleep(2);
 #else