Skip to content

Commit 2bcc951

Browse files
committedNov 14, 2018
[HIP] Fix device only compilation
Fix a bug causing host code being compiled when --cude-device-only is set. Differential Revision: https://reviews.llvm.org/D54496 llvm-svn: 346828
1 parent e913415 commit 2bcc951

File tree

2 files changed

+20
-10
lines changed

2 files changed

+20
-10
lines changed
 

‎clang/lib/Driver/Driver.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,17 +2616,19 @@ class OffloadingActionBuilder final {
26162616
C.MakeAction<LinkJobAction>(CudaDeviceActions,
26172617
types::TY_HIP_FATBIN);
26182618

2619-
DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
2620-
AssociatedOffloadKind);
2621-
// Clear the fat binary, it is already a dependence to an host
2622-
// action.
2623-
CudaFatBinary = nullptr;
2619+
if (!CompileDeviceOnly) {
2620+
DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
2621+
AssociatedOffloadKind);
2622+
// Clear the fat binary, it is already a dependence to an host
2623+
// action.
2624+
CudaFatBinary = nullptr;
2625+
}
26242626

26252627
// Remove the CUDA actions as they are already connected to an host
26262628
// action or fat binary.
26272629
CudaDeviceActions.clear();
26282630

2629-
return ABRT_Success;
2631+
return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
26302632
} else if (CurPhase == phases::Link) {
26312633
// Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
26322634
// This happens to each device action originated from each input file.
@@ -3014,8 +3016,10 @@ class OffloadingActionBuilder final {
30143016
}
30153017

30163018
// If we can use the bundler, replace the host action by the bundling one in
3017-
// the resulting list. Otherwise, just append the device actions.
3018-
if (CanUseBundler && !OffloadAL.empty()) {
3019+
// the resulting list. Otherwise, just append the device actions. For
3020+
// device only compilation, HostAction is a null pointer, therefore only do
3021+
// this when HostAction is not a null pointer.
3022+
if (CanUseBundler && HostAction && !OffloadAL.empty()) {
30193023
// Add the host action to the list in order to create the bundling action.
30203024
OffloadAL.push_back(HostAction);
30213025

‎clang/test/Driver/cuda-phases.cu

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@
157157
// HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
158158
// HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
159159
// HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
160+
// HBIN-NOT: device
160161
//
161162
// Test single gpu architecture up to the assemble phase in host-only
162163
// compilation mode.
@@ -172,6 +173,7 @@
172173
// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
173174
// HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
174175
// HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
176+
// HASM-NOT: device
175177

176178
//
177179
// Test two gpu architectures with complete compilation in host-only
@@ -190,6 +192,7 @@
190192
// HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
191193
// HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
192194
// HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
195+
// HBIN2-NOT: device
193196

194197
//
195198
// Test two gpu architectures up to the assemble phase in host-only
@@ -206,6 +209,7 @@
206209
// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
207210
// HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
208211
// HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
212+
// HASM2-NOT: device
209213

210214
//
211215
// Test single gpu architecture with complete compilation in device-only
@@ -224,7 +228,7 @@
224228
// DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
225229
// DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
226230
// DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
227-
231+
// DBIN-NOT: host
228232
//
229233
// Test single gpu architecture up to the assemble phase in device-only
230234
// compilation mode.
@@ -241,6 +245,7 @@
241245
// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
242246
// DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
243247
// DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
248+
// DASM-NOT: host
244249

245250
//
246251
// Test two gpu architectures with complete compilation in device-only
@@ -265,7 +270,7 @@
265270
// DBIN2_NV-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
266271
// DBIN2_NV-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
267272
// DBIN2_NV-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object
268-
273+
// DBIN2-NOT: host
269274
//
270275
// Test two gpu architectures up to the assemble phase in device-only
271276
// compilation mode.
@@ -288,3 +293,4 @@
288293
// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
289294
// DASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
290295
// DASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
296+
// DASM2-NOT: host

0 commit comments

Comments
 (0)
Please sign in to comment.