diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -81,6 +81,16 @@ // CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o // CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_52,file={{.*}}.out --image=profile=sm_70,file={{.*}}.out +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-bc.bc,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ +// RUN: -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --dry-run --host-triple x86_64-unknown-linux-gnu -linker-path \ +// RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-LTO + +// CUDA-LTO: ptxas{{.*}}-m64 -o [[CUBIN:.+]] -O2 --gpu-name sm_52 [[PTX:.+]] +// CUDA-LTO: fatbinary{{.*}}-64 --create [[FATBINARY:.+]] --image=profile=compute_52,file=[[PTX]] --image=profile=sm_52,file=[[CUBIN]] + // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ // RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -557,9 +557,16 @@ CmdArgs.push_back(TheTriple.isArch64Bit() ? "-64" : "-32"); CmdArgs.push_back("--create"); CmdArgs.push_back(TempFile); - for (const auto &FileAndArch : InputFiles) - CmdArgs.push_back(Saver.save("--image=profile=" + std::get<1>(FileAndArch) + - ",file=" + std::get<0>(FileAndArch))); + for (const auto &FileAndArch : InputFiles) { + if (std::get<0>(FileAndArch).endswith(".s")) + CmdArgs.push_back(Saver.save("--image=profile=compute_" + + std::get<1>(FileAndArch).split("_").second + + ",file=" + std::get<0>(FileAndArch))); + else + CmdArgs.push_back( + Saver.save("--image=profile=" + std::get<1>(FileAndArch) + + ",file=" + std::get<0>(FileAndArch))); + } if (Error Err = executeCommands(*FatBinaryPath, CmdArgs)) return std::move(Err); @@ -820,6 +827,7 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, SmallVectorImpl &OutputFiles, + SmallVectorImpl &AuxOutputFiles, const Triple &TheTriple, StringRef Arch) { SmallVector BitcodeInputFiles; DenseSet UsedInRegularObj; @@ -998,6 +1006,7 @@ // Is we are compiling for NVPTX we need to run the assembler first. if (TheTriple.isNVPTX()) { for (auto &File : Files) { + AuxOutputFiles.push_back(static_cast(File)); auto FileOrErr = nvptx::assemble(File, TheTriple, Arch, !WholeProgram); if (!FileOrErr) return FileOrErr.takeError(); @@ -1187,7 +1196,9 @@ // First link and remove all the input files containing bitcode. SmallVector InputFiles; - if (Error Err = linkBitcodeFiles(Input, InputFiles, Triple, Arch)) + SmallVector OutputFiles; + if (Error Err = + linkBitcodeFiles(Input, InputFiles, OutputFiles, Triple, Arch)) return Err; // Write any remaining device inputs to an output file for the linker job. @@ -1205,20 +1216,27 @@ : InputFiles.front(); if (!OutputOrErr) return OutputOrErr.takeError(); + OutputFiles.push_back(*OutputOrErr); - // Store the offloading image for each linked output file. + // Store the offloading image for each output file. for (OffloadKind Kind : ActiveOffloadKinds) { - llvm::ErrorOr> FileOrErr = - llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr); - if (std::error_code EC = FileOrErr.getError()) - return createFileError(*OutputOrErr, EC); - - OffloadingImage TheImage{}; - TheImage.TheImageKind = IMG_Object; - TheImage.TheOffloadKind = Kind; - TheImage.StringData = {{"triple", TripleStr}, {"arch", Arch}}; - TheImage.Image = std::move(*FileOrErr); - Images[Kind].emplace_back(std::move(TheImage)); + for (StringRef Output : OutputFiles) { + // Ignore any PTX output if we're not creating a fatbinary. + if (Output.endswith(".s") && Kind != OFK_Cuda) + continue; + + llvm::ErrorOr> FileOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(Output); + if (std::error_code EC = FileOrErr.getError()) + return createFileError(Output, EC); + + OffloadingImage TheImage{}; + TheImage.TheImageKind = Output.endswith(".s") ? IMG_PTX : IMG_Object; + TheImage.TheOffloadKind = Kind; + TheImage.StringData = {{"triple", TripleStr}, {"arch", Arch}}; + TheImage.Image = std::move(*FileOrErr); + Images[Kind].emplace_back(std::move(TheImage)); + } } }