Skip to content

Commit 720f8da

Browse files
author
Justin Lebar
committedMay 30, 2016
[CUDA] Fix order of vectorized ldg intrinsics' elements.
Summary: The order is [x, y, z, w], not [w, x, y, z]. Subscribers: cfe-commits, tra Differential Revision: http://reviews.llvm.org/D20794 llvm-svn: 271215
1 parent 09175da commit 720f8da

File tree

1 file changed

+28
-28
lines changed

1 file changed

+28
-28
lines changed
 

‎clang/lib/Headers/__clang_cuda_intrinsics.h

+28-28
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ inline __device__ char4 __ldg(const char4 *ptr) {
7474
typedef char c4 __attribute__((ext_vector_type(4)));
7575
c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));
7676
char4 ret;
77-
ret.w = rv[0];
78-
ret.x = rv[1];
79-
ret.y = rv[2];
80-
ret.z = rv[3];
77+
ret.x = rv[0];
78+
ret.y = rv[1];
79+
ret.z = rv[2];
80+
ret.w = rv[3];
8181
return ret;
8282
}
8383
inline __device__ short2 __ldg(const short2 *ptr) {
@@ -92,10 +92,10 @@ inline __device__ short4 __ldg(const short4 *ptr) {
9292
typedef short s4 __attribute__((ext_vector_type(4)));
9393
s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));
9494
short4 ret;
95-
ret.w = rv[0];
96-
ret.x = rv[1];
97-
ret.y = rv[2];
98-
ret.z = rv[3];
95+
ret.x = rv[0];
96+
ret.y = rv[1];
97+
ret.z = rv[2];
98+
ret.w = rv[3];
9999
return ret;
100100
}
101101
inline __device__ int2 __ldg(const int2 *ptr) {
@@ -110,10 +110,10 @@ inline __device__ int4 __ldg(const int4 *ptr) {
110110
typedef int i4 __attribute__((ext_vector_type(4)));
111111
i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));
112112
int4 ret;
113-
ret.w = rv[0];
114-
ret.x = rv[1];
115-
ret.y = rv[2];
116-
ret.z = rv[3];
113+
ret.x = rv[0];
114+
ret.y = rv[1];
115+
ret.z = rv[2];
116+
ret.w = rv[3];
117117
return ret;
118118
}
119119
inline __device__ longlong2 __ldg(const longlong2 *ptr) {
@@ -137,10 +137,10 @@ inline __device__ uchar4 __ldg(const uchar4 *ptr) {
137137
typedef unsigned char uc4 __attribute__((ext_vector_type(4)));
138138
uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));
139139
uchar4 ret;
140-
ret.w = rv[0];
141-
ret.x = rv[1];
142-
ret.y = rv[2];
143-
ret.z = rv[3];
140+
ret.x = rv[0];
141+
ret.y = rv[1];
142+
ret.z = rv[2];
143+
ret.w = rv[3];
144144
return ret;
145145
}
146146
inline __device__ ushort2 __ldg(const ushort2 *ptr) {
@@ -155,10 +155,10 @@ inline __device__ ushort4 __ldg(const ushort4 *ptr) {
155155
typedef unsigned short us4 __attribute__((ext_vector_type(4)));
156156
us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));
157157
ushort4 ret;
158-
ret.w = rv[0];
159-
ret.x = rv[1];
160-
ret.y = rv[2];
161-
ret.z = rv[3];
158+
ret.x = rv[0];
159+
ret.y = rv[1];
160+
ret.z = rv[2];
161+
ret.w = rv[3];
162162
return ret;
163163
}
164164
inline __device__ uint2 __ldg(const uint2 *ptr) {
@@ -173,10 +173,10 @@ inline __device__ uint4 __ldg(const uint4 *ptr) {
173173
typedef unsigned int ui4 __attribute__((ext_vector_type(4)));
174174
ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));
175175
uint4 ret;
176-
ret.w = rv[0];
177-
ret.x = rv[1];
178-
ret.y = rv[2];
179-
ret.z = rv[3];
176+
ret.x = rv[0];
177+
ret.y = rv[1];
178+
ret.z = rv[2];
179+
ret.w = rv[3];
180180
return ret;
181181
}
182182
inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {
@@ -200,10 +200,10 @@ inline __device__ float4 __ldg(const float4 *ptr) {
200200
typedef float f4 __attribute__((ext_vector_type(4)));
201201
f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));
202202
float4 ret;
203-
ret.w = rv[0];
204-
ret.x = rv[1];
205-
ret.y = rv[2];
206-
ret.z = rv[3];
203+
ret.x = rv[0];
204+
ret.y = rv[1];
205+
ret.z = rv[2];
206+
ret.w = rv[3];
207207
return ret;
208208
}
209209
inline __device__ double2 __ldg(const double2 *ptr) {

0 commit comments

Comments
 (0)
Please sign in to comment.