diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4712,10 +4712,13 @@ // we are also generating .o files. So we allow more than one output file in // this case as well. // + // OffloadClass of type TY_Nothing: device-only output will place many outputs + // into a single offloading action. We should count all inputs to the action + // as outputs. if (FinalOutput) { unsigned NumOutputs = 0; unsigned NumIfsOutputs = 0; - for (const Action *A : C.getActions()) + for (const Action *A : C.getActions()) { if (A->getType() != types::TY_Nothing && !(A->getKind() == Action::IfsMergeJobClass || (A->getType() == clang::driver::types::TY_IFS_CPP && @@ -4724,6 +4727,10 @@ (A->getKind() == Action::BindArchClass && A->getInputs().size() && A->getInputs().front()->getKind() == Action::IfsMergeJobClass))) ++NumOutputs; + else if (A->getKind() == Action::OffloadClass && + A->getType() == types::TY_Nothing) + NumOutputs += A->size(); + } if (NumOutputs > 1) { Diag(clang::diag::err_drv_output_argument_with_multiple_files); @@ -5261,20 +5268,21 @@ // \ // Device Action 1 ---> OffloadAction -> Device Action 2 // - // For a) and b), we just return the job generated for the dependence. For + // For a) and b), we just return the job generated for the dependences. For // c) and d) we override the current action with the host/device dependence // if the current toolchain is host/device and set the offload dependences // info with the jobs obtained from the device/host dependence(s). - // If there is a single device option, just generate the job for it. - if (OA->hasSingleDeviceDependence()) { + // If there is a single device option or has no host action, just generate + // the job for it. + if (OA->hasSingleDeviceDependence() || !OA->hasHostDependence()) { InputInfoList DevA; OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - DevA = - BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel, - /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, - CachedResults, DepA->getOffloadingDeviceKind()); + DevA.append(BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel, + /*MultipleArchs*/ !!DepBoundArch, + LinkingOutput, CachedResults, + DepA->getOffloadingDeviceKind())); }); return DevA; } diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu --- a/clang/test/Driver/cuda-bindings.cu +++ b/clang/test/Driver/cuda-bindings.cu @@ -146,3 +146,20 @@ // RUN: --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o %s 2>&1 \ // RUN: | FileCheck -check-prefix=D_ONLY %s // D_ONLY: "foo.o" + +// +// Check to make sure we can generate multiple outputs for device-only +// compilation and fail with '-o'. +// +// RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \ +// RUN: --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MULTI-D-ONLY %s +// MULTI-D-ONLY: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_70:.+]]" +// MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_70]]"], output: "[[CUBIN_70:.+]]" +// MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_52:.+]]" +// MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_52]]"], output: "[[CUBIN_52:.+]]" +// +// RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \ +// RUN: --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c -o %t %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MULTI-D-ONLY-O %s +// MULTI-D-ONLY-O: error: cannot specify -o when generating multiple output files