Skip to content

Commit d7a3549

Browse files
author
Justin Lebar
committedFeb 24, 2016
[CUDA] Add conversion operators for threadIdx, blockIdx, gridDim, and blockDim to uint3 and dim3.
Summary: This lets you write, e.g. uint3 a = threadIdx; uint3 b = blockIdx; dim3 c = gridDim; dim3 d = blockDim; which is legal in nvcc, but was not legal in clang. The fact that e.g. the type of threadIdx is not actually uint3 is still observable, but now you have to try to observe it. Reviewers: tra Subscribers: echristo, cfe-commits Differential Revision: http://reviews.llvm.org/D17561 llvm-svn: 261777
1 parent c8dae53 commit d7a3549

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed
 

‎clang/lib/Headers/__clang_cuda_runtime_wrapper.h

+27
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,33 @@ __device__ static inline void *malloc(size_t __size) {
245245
}
246246
} // namespace std
247247

248+
// Out-of-line implementations from cuda_builtin_vars.h. These need to come
249+
// after we've pulled in the definition of uint3 and dim3.
250+
251+
__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
252+
uint3 ret;
253+
ret.x = x;
254+
ret.y = y;
255+
ret.z = z;
256+
return ret;
257+
}
258+
259+
__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
260+
uint3 ret;
261+
ret.x = x;
262+
ret.y = y;
263+
ret.z = z;
264+
return ret;
265+
}
266+
267+
__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
268+
return dim3(x, y, z);
269+
}
270+
271+
__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
272+
return dim3(x, y, z);
273+
}
274+
248275
#include <__clang_cuda_cmath.h>
249276

250277
// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host

‎clang/lib/Headers/cuda_builtin_vars.h

+17-1
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@
2424
#ifndef __CUDA_BUILTIN_VARS_H
2525
#define __CUDA_BUILTIN_VARS_H
2626

27+
// Forward declares from vector_types.h.
28+
struct uint3;
29+
struct dim3;
30+
2731
// The file implements built-in CUDA variables using __declspec(property).
2832
// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
2933
// All read accesses of built-in variable fields get converted into calls to a
30-
// getter function which in turn would call appropriate builtin to fetch the
34+
// getter function which in turn calls the appropriate builtin to fetch the
3135
// value.
3236
//
3337
// Example:
@@ -63,6 +67,9 @@ struct __cuda_builtin_threadIdx_t {
6367
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
6468
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
6569
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
70+
// threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
71+
// uint3). This function is defined after we pull in vector_types.h.
72+
__attribute__((device)) operator uint3() const;
6673
private:
6774
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
6875
};
@@ -71,6 +78,9 @@ struct __cuda_builtin_blockIdx_t {
7178
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
7279
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
7380
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
81+
// blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
82+
// uint3). This function is defined after we pull in vector_types.h.
83+
__attribute__((device)) operator uint3() const;
7484
private:
7585
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
7686
};
@@ -79,6 +89,9 @@ struct __cuda_builtin_blockDim_t {
7989
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
8090
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
8191
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
92+
// blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
93+
// dim3). This function is defined after we pull in vector_types.h.
94+
__attribute__((device)) operator dim3() const;
8295
private:
8396
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
8497
};
@@ -87,6 +100,9 @@ struct __cuda_builtin_gridDim_t {
87100
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
88101
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
89102
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
103+
// gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
104+
// dim3). This function is defined after we pull in vector_types.h.
105+
__attribute__((device)) operator dim3() const;
90106
private:
91107
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
92108
};

0 commit comments

Comments
 (0)