[CUDA] Add conversion operators for threadIdx, blockIdx, gridDim, and blockDim to uint3 and dim3.

Justin Lebar · Justin Lebar · commit d7a35492ada2 · 2016-02-24T21:49:33.000Z
Summary: This lets you write, e.g. uint3 a = threadIdx; uint3 b = blockIdx; dim3 c = gridDim; dim3 d = blockDim; which is legal in nvcc, but was not legal in clang. The fact that e.g. the type of threadIdx is not actually uint3 is still observable, but now you have to try to observe it. Reviewers: tra Subscribers: echristo, cfe-commits Differential Revision: http://reviews.llvm.org/D17561 llvm-svn: 261777
diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -245,6 +245,33 @@ __device__ static inline void *malloc(size_t __size) {
 }
 } // namespace std
 
+// Out-of-line implementations from cuda_builtin_vars.h.  These need to come
+// after we've pulled in the definition of uint3 and dim3.
+
+__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
+__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
 #include <__clang_cuda_cmath.h>
 
 // curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host
diff --git a/clang/lib/Headers/cuda_builtin_vars.h b/clang/lib/Headers/cuda_builtin_vars.h
@@ -24,10 +24,14 @@
 #ifndef __CUDA_BUILTIN_VARS_H
 #define __CUDA_BUILTIN_VARS_H
 
+// Forward declares from vector_types.h.
+struct uint3;
+struct dim3;
+
 // The file implements built-in CUDA variables using __declspec(property).
 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
 // All read accesses of built-in variable fields get converted into calls to a
-// getter function which in turn would call appropriate builtin to fetch the
+// getter function which in turn calls the appropriate builtin to fetch the
 // value.
 //
 // Example:
@@ -63,6 +67,9 @@ struct __cuda_builtin_threadIdx_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+  // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
 };
@@ -71,6 +78,9 @@ struct __cuda_builtin_blockIdx_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+  // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
 };
@@ -79,6 +89,9 @@ struct __cuda_builtin_blockDim_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+  // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
 };
@@ -87,6 +100,9 @@ struct __cuda_builtin_gridDim_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+  // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
 };