Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -2616,17 +2616,19 @@ C.MakeAction(CudaDeviceActions, types::TY_HIP_FATBIN); - DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, - AssociatedOffloadKind); - // Clear the fat binary, it is already a dependence to an host - // action. - CudaFatBinary = nullptr; + if (!CompileDeviceOnly) { + DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, + AssociatedOffloadKind); + // Clear the fat binary, it is already a dependence to an host + // action. + CudaFatBinary = nullptr; + } // Remove the CUDA actions as they are already connected to an host // action or fat binary. CudaDeviceActions.clear(); - return ABRT_Success; + return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; } else if (CurPhase == phases::Link) { // Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch. // This happens to each device action originated from each input file. @@ -3014,8 +3016,10 @@ } // If we can use the bundler, replace the host action by the bundling one in - // the resulting list. Otherwise, just append the device actions. - if (CanUseBundler && !OffloadAL.empty()) { + // the resulting list. Otherwise, just append the device actions. For + // device only compilation, HostAction is a null pointer, therefore only do + // this when HostAction is not a null pointer. + if (CanUseBundler && HostAction && !OffloadAL.empty()) { // Add the host action to the list in order to create the bundling action. OffloadAL.push_back(HostAction); Index: test/Driver/cuda-phases.cu =================================================================== --- test/Driver/cuda-phases.cu +++ test/Driver/cuda-phases.cu @@ -157,6 +157,7 @@ // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) +// HBIN-NOT: device // // Test single gpu architecture up to the assemble phase in host-only // compilation mode. @@ -172,6 +173,7 @@ // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HASM-NOT: device // // Test two gpu architectures with complete compilation in host-only @@ -190,6 +192,7 @@ // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) +// HBIN2-NOT: device // // Test two gpu architectures up to the assemble phase in host-only @@ -206,6 +209,7 @@ // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HASM2-NOT: device // // Test single gpu architecture with complete compilation in device-only @@ -224,7 +228,7 @@ // DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) // DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) // DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object - +// DBIN-NOT: host // // Test single gpu architecture up to the assemble phase in device-only // compilation mode. @@ -241,6 +245,7 @@ // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) // DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) // DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler +// DASM-NOT: host // // Test two gpu architectures with complete compilation in device-only @@ -265,7 +270,7 @@ // DBIN2_NV-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]]) // DBIN2_NV-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]]) // DBIN2_NV-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object - +// DBIN2-NOT: host // // Test two gpu architectures up to the assemble phase in device-only // compilation mode. @@ -288,3 +293,4 @@ // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) // DASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) // DASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler +// DASM2-NOT: host