diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -51,6 +51,16 @@ // AMDGPU-LINK: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.out {{.*}}.o {{.*}}.o +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ +// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS + +// AMDGPU-LTO-TEMPS: clang" -o [[OBJ:.+]] -fPIC -c --target=amdgcn-amd-amdhsa -O2 -mcpu=gfx1030 {{.*}}.s +// AMDGPU-LTO-TEMPS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx1030 -o {{.*}}.out {{.*}}.o + // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -510,6 +510,42 @@ } } +Expected assemble(StringRef InputFile, const ArgList &Args) { + llvm::TimeTraceScope TimeScope("Clang Assembler"); + // Use `clang` to invoke the generic assembler. + Expected ClangPath = + findProgram("clang", {getMainExecutable("clang")}); + if (!ClangPath) + return ClangPath.takeError(); + + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); + // Create a new file to write the linked device image to. Assume that the + // input filename already has the device and architecture. + auto TempFileOrErr = createOutputFile(sys::path::stem(InputFile), "o"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); + SmallVector CmdArgs{ + *ClangPath, + "-o", + *TempFileOrErr, + "-fPIC", + "-c", + Args.MakeArgString("--target=" + Triple.getTriple()), + Args.MakeArgString("-" + OptLevel), + Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch) + : Args.MakeArgString("-march=" + Arch), + InputFile, + }; + + if (Error Err = executeCommands(*ClangPath, CmdArgs)) + return std::move(Err); + + return *TempFileOrErr; +} + Expected link(ArrayRef InputFiles, const ArgList &Args) { llvm::TimeTraceScope TimeScope("Generic linker"); const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); @@ -685,7 +721,8 @@ }; } Conf.PostOptModuleHook = Hook; - Conf.CGFileType = Triple.isNVPTX() ? CGFT_AssemblyFile : CGFT_ObjectFile; + Conf.CGFileType = + (Triple.isNVPTX() || SaveTemps) ? CGFT_AssemblyFile : CGFT_ObjectFile; // TODO: Handle remark files Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program); @@ -852,7 +889,7 @@ const Twine &ModuleName) -> std::unique_ptr { int FD = -1; auto &TempFile = Files[Task]; - StringRef Extension = (Triple.isNVPTX()) ? "s" : "o"; + StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o"; std::string TaskStr = Task ? "." + std::to_string(Task) : ""; auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) + "-device-" + @@ -885,9 +922,12 @@ } // Is we are compiling for NVPTX we need to run the assembler first. - if (Triple.isNVPTX()) { + if (Triple.isNVPTX() || SaveTemps) { for (StringRef &File : Files) { - auto FileOrErr = nvptx::assemble(File, Args, !SingleOutput); + + auto FileOrErr = Triple.isNVPTX() + ? nvptx::assemble(File, Args, !SingleOutput) + : generic::assemble(File, Args); if (!FileOrErr) return FileOrErr.takeError(); File = *FileOrErr;