diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4459,17 +4459,6 @@ OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); A = C.MakeAction(HDep, DDep); - } else if (isa(A) && Kind == Action::OFK_Cuda) { - // The Cuda toolchain uses fatbinary as the linker phase to bundle the - // PTX and Cubin output. - ActionList FatbinActions; - for (Action *A : {A, A->getInputs()[0]}) { - OffloadAction::DeviceDependences DDep; - DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); - FatbinActions.emplace_back( - C.MakeAction(DDep, A->getType())); - } - A = C.MakeAction(FatbinActions, types::TY_CUDA_FATBIN); } ++TCAndArch; } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -536,8 +536,9 @@ const char *Arch = (II.getType() == types::TY_PP_Asm) ? CudaArchToVirtualArchString(gpu_arch) : gpu_arch_str; - CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + - Arch + ",file=" + II.getFilename())); + CmdArgs.push_back( + Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + + ",file=" + getToolChain().getInputFilename(II))); } for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) @@ -695,9 +696,8 @@ std::string CudaToolChain::getInputFilename(const InputInfo &Input) const { // Only object files are changed, for example assembly files keep their .s - // extensions. CUDA also continues to use .o as they don't use nvlink but - // fatbinary. - if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object)) + // extensions. + if (Input.getType() != types::TY_Object) return ToolChain::getInputFilename(Input); // Replace extension for object files with cubin because nvlink relies on diff --git a/clang/test/Driver/cuda-openmp-driver.cu b/clang/test/Driver/cuda-openmp-driver.cu --- a/clang/test/Driver/cuda-openmp-driver.cu +++ b/clang/test/Driver/cuda-openmp-driver.cu @@ -5,13 +5,11 @@ // RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \ // RUN: | FileCheck -check-prefix BINDINGS %s -// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]" +// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]" -// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]" -// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]" -// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[BINARY:.+]]" +// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[CUBIN_SM_35]]", "[[CUBIN_SM_70]]"], output: "[[BINARY:.+]]" // BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" // BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" @@ -31,7 +29,6 @@ // BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]]" // BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]]"], output: "[[CUBIN:.+]]" -// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]]", "[[PTX]]"], output: "{{.*}}.fatbin" // RUN: %clang -### -target x86_64-linux-gnu -nocudalib --cuda-feature=+ptx61 --offload-arch=sm_70 %s 2>&1 | FileCheck -check-prefix MANUAL-FEATURE %s // MANUAL-FEATURE: -cc1{{.*}}-target-feature{{.*}}+ptx61 diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -232,20 +232,14 @@ // NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52) // NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52) // NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object -// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler -// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52) -// NEW_DRIVER: 11: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, cuda-fatbin -// NEW_DRIVER: 12: input, "[[INPUT]]", cuda, (device-cuda, sm_70) -// NEW_DRIVER: 13: preprocessor, {12}, cuda-cpp-output, (device-cuda, sm_70) -// NEW_DRIVER: 14: compiler, {13}, ir, (device-cuda, sm_70) -// NEW_DRIVER: 15: backend, {14}, assembler, (device-cuda, sm_70) -// NEW_DRIVER: 16: assembler, {15}, object, (device-cuda, sm_70) -// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {16}, object -// NEW_DRIVER: 18: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, assembler -// NEW_DRIVER: 19: linker, {17, 18}, cuda-fatbin, (device-cuda, sm_70) -// NEW_DRIVER: 20: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {19}, cuda-fatbin -// NEW_DRIVER: 21: clang-offload-packager, {11, 20}, image -// NEW_DRIVER: 22: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {21}, ir -// NEW_DRIVER: 23: backend, {22}, assembler, (host-cuda) -// NEW_DRIVER: 24: assembler, {23}, object, (host-cuda) -// NEW_DRIVER: 25: clang-linker-wrapper, {24}, image, (host-cuda) +// NEW_DRIVER: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70) +// NEW_DRIVER: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) +// NEW_DRIVER: 11: compiler, {10}, ir, (device-cuda, sm_70) +// NEW_DRIVER: 12: backend, {11}, assembler, (device-cuda, sm_70) +// NEW_DRIVER: 13: assembler, {12}, object, (device-cuda, sm_70) +// NEW_DRIVER: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object +// NEW_DRIVER: 15: clang-offload-packager, {8, 14}, image +// NEW_DRIVER: 16: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {15}, ir +// NEW_DRIVER: 17: backend, {16}, assembler, (host-cuda) +// NEW_DRIVER: 18: assembler, {17}, object, (host-cuda) +// NEW_DRIVER: 19: clang-linker-wrapper, {18}, image, (host-cuda)