diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -12,17 +12,11 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK - -// NVPTX-LINK: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o - -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HOST-BC +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK -// HOST-BC: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o +// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ @@ -31,16 +25,7 @@ // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG -// NVPTX-LINK-DEBUG: nvlink{{.*}}-m64 -g -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o - -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O2 \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG-LTO - -// NVPTX-LINK-DEBUG-LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 -lineinfo {{.*}}.s +// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -Wl,--no-undefined -g {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -49,7 +34,7 @@ // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK -// AMDGPU-LINK: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o {{.*}}.o +// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ @@ -58,17 +43,7 @@ // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}}-o [[OBJ:.+]] -fPIC -c --target=amdgcn-amd-amdhsa -O2 -mcpu=gfx1030 {{.*}}.s -// AMDGPU-LTO-TEMPS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx1030 -o {{.*}}.img {{.*}}.o - -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ -// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK-LTO - -// AMDGPU-LINK-LTO: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o +// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined -save-temps {{.*}}.s // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ @@ -77,7 +52,7 @@ // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld.lld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK -// CPU-LINK: ld.lld{{.*}}-m elf_x86_64 -shared -Bsymbolic -o {{.*}}.img {{.*}}.o {{.*}}.o +// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -Wl,--no-undefined -Bsymbolic -shared {{.*}}.o {{.*}}.o // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \ @@ -86,25 +61,6 @@ // HOST-LINK: ld.lld{{.*}}-a -b -c {{.*}}.o -o a.out // HOST-LINK-NOT: ld.lld{{.*}}-abc -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LTO - -// LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 {{.*}}.s -// LTO-NOT: nvlink - -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-OMP-LINK - -// CUDA-OMP-LINK: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o - // RUN: clang-offload-packager -o %t-lib.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ // RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52 @@ -116,8 +72,8 @@ // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld -- %t.a %t-obj.o -o a.out 2>&1 | FileCheck %s --check-prefix=STATIC-LIBRARY -// STATIC-LIBRARY: nvlink{{.*}} -arch sm_70 -// STATIC-LIBRARY-NOT: nvlink{{.*}} -arch sm_50 +// STATIC-LIBRARY: clang{{.*}} -march=sm_70 +// STATIC-LIBRARY-NOT: clang{{.*}} -march=sm_50 // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 \ @@ -128,9 +84,9 @@ // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA -// CUDA: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_52 {{.*}}.o -// CUDA: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o -// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file={{.*}}.img --image=profile=sm_52,file={{.*}}.img +// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52 +// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70 +// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]] // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_80 \ @@ -153,9 +109,9 @@ // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP -// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o -// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx90a -o {{.*}}.img {{.*}}.o -// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input={{.*}}.img -input={{.*}}.img -output={{.*}}.hipfb +// HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908 +// HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a +// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -166,8 +122,8 @@ // RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LINKER-ARGS -// LINKER-ARGS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o a -// LINKER-ARGS: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o a b +// LINKER-ARGS: clang{{.*}}--target=amdgcn-amd-amdhsa{{.*}}-Wl,a +// LINKER-ARGS: clang{{.*}}--target=nvptx64-nvidia-cuda{{.*}}-Wl,a -Wl,b // RUN: not clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -ldummy \ // RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -271,99 +271,13 @@ } namespace nvptx { -Expected assemble(StringRef InputFile, const ArgList &Args, - bool RDC = true) { - llvm::TimeTraceScope TimeScope("NVPTX Assembler"); - // NVPTX uses the ptxas binary to create device object files. - Expected PtxasPath = findProgram("ptxas", {CudaBinaryPath}); - if (!PtxasPath) - return PtxasPath.takeError(); - - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - // Create a new file to write the linked device image to. Assume that the - // input filename already has the device and architecture. - auto TempFileOrErr = createOutputFile(sys::path::stem(InputFile), "cubin"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - SmallVector CmdArgs; - StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); - CmdArgs.push_back(*PtxasPath); - CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); - if (Verbose) - CmdArgs.push_back("-v"); - for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg)) - CmdArgs.push_back(Args.MakeArgString(Arg)); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*TempFileOrErr); - CmdArgs.push_back(Args.MakeArgString("-" + OptLevel)); - CmdArgs.push_back("--gpu-name"); - CmdArgs.push_back(Arch); - if (Args.hasArg(OPT_debug) && OptLevel[1] == '0') - CmdArgs.push_back("-g"); - else if (Args.hasArg(OPT_debug)) - CmdArgs.push_back("-lineinfo"); - if (RDC) - CmdArgs.push_back("-c"); - - CmdArgs.push_back(InputFile); - - if (Error Err = executeCommands(*PtxasPath, CmdArgs)) - return std::move(Err); - - return *TempFileOrErr; -} - -Expected link(ArrayRef InputFiles, const ArgList &Args) { - llvm::TimeTraceScope TimeScope("NVPTX linker"); - // NVPTX uses the nvlink binary to link device object files. - Expected NvlinkPath = findProgram("nvlink", {CudaBinaryPath}); - if (!NvlinkPath) - return NvlinkPath.takeError(); - - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - - // Create a new file to write the linked device image to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName) + "." + - Triple.getArchName() + "." + Arch, - "img"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - SmallVector CmdArgs; - CmdArgs.push_back(*NvlinkPath); - CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); - if (Args.hasArg(OPT_debug)) - CmdArgs.push_back("-g"); - if (Verbose) - CmdArgs.push_back("-v"); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*TempFileOrErr); - CmdArgs.push_back("-arch"); - CmdArgs.push_back(Arch); - - // Add extracted input files. - for (StringRef Input : InputFiles) - CmdArgs.push_back(Input); - - for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) - CmdArgs.push_back(Args.MakeArgString(Arg)); - if (Error Err = executeCommands(*NvlinkPath, CmdArgs)) - return std::move(Err); - - return *TempFileOrErr; -} - Expected fatbinary(ArrayRef> InputFiles, const ArgList &Args) { llvm::TimeTraceScope TimeScope("NVPTX fatbinary"); // NVPTX uses the fatbinary program to bundle the linked images. Expected FatBinaryPath = - findProgram("fatbinary", {CudaBinaryPath}); + findProgram("fatbinary", {CudaBinaryPath + "/bin"}); if (!FatBinaryPath) return FatBinaryPath.takeError(); @@ -393,49 +307,6 @@ } // namespace nvptx namespace amdgcn { -Expected link(ArrayRef InputFiles, const ArgList &Args) { - llvm::TimeTraceScope TimeScope("AMDGPU linker"); - // AMDGPU uses lld to link device object files. - Expected LLDPath = - findProgram("lld", {getMainExecutable("lld")}); - if (!LLDPath) - return LLDPath.takeError(); - - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - - // Create a new file to write the linked device image to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName) + "." + - Triple.getArchName() + "." + Arch, - "img"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - std::string ArchArg = ("-plugin-opt=mcpu=" + Arch).str(); - - SmallVector CmdArgs; - CmdArgs.push_back(*LLDPath); - CmdArgs.push_back("-flavor"); - CmdArgs.push_back("gnu"); - CmdArgs.push_back("--no-undefined"); - CmdArgs.push_back("-shared"); - CmdArgs.push_back("-plugin-opt=-amdgpu-internalize-symbols"); - CmdArgs.push_back(ArchArg); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*TempFileOrErr); - - // Add extracted input files. - for (StringRef Input : InputFiles) - CmdArgs.push_back(Input); - - for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) - CmdArgs.push_back(Args.MakeArgString(Arg)); - if (Error Err = executeCommands(*LLDPath, CmdArgs)) - return std::move(Err); - - return *TempFileOrErr; -} - Expected fatbinary(ArrayRef> InputFiles, const ArgList &Args) { @@ -483,35 +354,9 @@ } // namespace amdgcn namespace generic { - -const char *getLDMOption(const llvm::Triple &T) { - switch (T.getArch()) { - case llvm::Triple::x86: - if (T.isOSIAMCU()) - return "elf_iamcu"; - return "elf_i386"; - case llvm::Triple::aarch64: - return "aarch64linux"; - case llvm::Triple::aarch64_be: - return "aarch64linuxb"; - case llvm::Triple::ppc64: - return "elf64ppc"; - case llvm::Triple::ppc64le: - return "elf64lppc"; - case llvm::Triple::x86_64: - if (T.isX32()) - return "elf32_x86_64"; - return "elf_x86_64"; - case llvm::Triple::ve: - return "elf64ve"; - default: - return nullptr; - } -} - -Expected assemble(StringRef InputFile, const ArgList &Args) { - llvm::TimeTraceScope TimeScope("Clang Assembler"); - // Use `clang` to invoke the generic assembler. +Expected clang(ArrayRef InputFiles, const ArgList &Args) { + llvm::TimeTraceScope TimeScope("Clang"); + // Use `clang` to invoke the appropriate device tools. Expected ClangPath = findProgram("clang", {getMainExecutable("clang")}); if (!ClangPath) @@ -519,9 +364,14 @@ const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); + if (Arch.empty()) + Arch = "native"; // Create a new file to write the linked device image to. Assume that the // input filename already has the device and architecture. - auto TempFileOrErr = createOutputFile(sys::path::stem(InputFile), "o"); + auto TempFileOrErr = + createOutputFile(sys::path::filename(ExecutableName) + "." + + Triple.getArchName() + "." + Arch, + "img"); if (!TempFileOrErr) return TempFileOrErr.takeError(); @@ -530,65 +380,47 @@ *ClangPath, "-o", *TempFileOrErr, - "-fPIC", - "-c", Args.MakeArgString("--target=" + Triple.getTriple()), - Args.MakeArgString("-" + OptLevel), Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch) : Args.MakeArgString("-march=" + Arch), - InputFile, + Args.MakeArgString("-" + OptLevel), + "-Wl,--no-undefined", }; - if (Error Err = executeCommands(*ClangPath, CmdArgs)) - return std::move(Err); + // If this is CPU offloading we copy the input libraries. + if (!Triple.isAMDGPU() && !Triple.isNVPTX()) { + CmdArgs.push_back("-Bsymbolic"); + CmdArgs.push_back("-shared"); + ArgStringList LinkerArgs; + for (const opt::Arg *Arg : + Args.filtered(OPT_library, OPT_rpath, OPT_library_path)) + Arg->render(Args, LinkerArgs); + llvm::copy(LinkerArgs, std::back_inserter(CmdArgs)); + } - return *TempFileOrErr; -} + if (Args.hasArg(OPT_debug)) + CmdArgs.push_back("-g"); -Expected link(ArrayRef InputFiles, const ArgList &Args) { - llvm::TimeTraceScope TimeScope("Generic linker"); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); + if (SaveTemps) + CmdArgs.push_back("-save-temps"); - // Create a new file to write the linked device image to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName) + "." + - Triple.getArchName() + "." + Arch, - "img"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); + if (Verbose) + CmdArgs.push_back("-v"); - // Use the host linker to perform generic offloading. Use the same libraries - // and paths as the host application does. - SmallVector CmdArgs; - CmdArgs.push_back(Args.getLastArgValue(OPT_linker_path_EQ)); - CmdArgs.push_back("-m"); - CmdArgs.push_back(getLDMOption(Triple)); - CmdArgs.push_back("-shared"); + if (!CudaBinaryPath.empty()) + CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath)); - ArgStringList LinkerArgs; - for (const opt::Arg *Arg : Args) { - auto Op = Arg->getOption(); - if (Op.matches(OPT_library) || Op.matches(OPT_library_path) || - Op.matches(OPT_as_needed) || Op.matches(OPT_no_as_needed) || - Op.matches(OPT_rpath) || Op.matches(OPT_dynamic_linker)) - Arg->render(Args, LinkerArgs); - } - for (StringRef Arg : LinkerArgs) - CmdArgs.push_back(Arg); + for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg)) + llvm::copy(SmallVector({"-Xcuda-ptxas", Arg}), + std::back_inserter(CmdArgs)); - CmdArgs.push_back("-Bsymbolic"); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*TempFileOrErr); + for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) + CmdArgs.push_back(Args.MakeArgString("-Wl," + Arg)); - // Add extracted input files. - for (StringRef Input : InputFiles) - CmdArgs.push_back(Input); + for (StringRef InputFile : InputFiles) + CmdArgs.push_back(InputFile); - for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) - CmdArgs.push_back(Args.MakeArgString(Arg)); - if (Error Err = - executeCommands(Args.getLastArgValue(OPT_linker_path_EQ), CmdArgs)) + if (Error Err = executeCommands(*ClangPath, CmdArgs)) return std::move(Err); return *TempFileOrErr; @@ -601,16 +433,14 @@ switch (Triple.getArch()) { case Triple::nvptx: case Triple::nvptx64: - return nvptx::link(InputFiles, Args); case Triple::amdgcn: - return amdgcn::link(InputFiles, Args); case Triple::x86: case Triple::x86_64: case Triple::aarch64: case Triple::aarch64_be: case Triple::ppc64: case Triple::ppc64le: - return generic::link(InputFiles, Args); + return generic::clang(InputFiles, Args); default: return createStringError(inconvertibleErrorCode(), Triple.getArchName() + @@ -933,19 +763,6 @@ return Error::success(); } - // Is we are compiling for NVPTX we need to run the assembler first. - if (Triple.isNVPTX() || SaveTemps) { - for (StringRef &File : Files) { - - auto FileOrErr = Triple.isNVPTX() - ? nvptx::assemble(File, Args, !SingleOutput) - : generic::assemble(File, Args); - if (!FileOrErr) - return FileOrErr.takeError(); - File = *FileOrErr; - } - } - // Append the new inputs to the device linker input. for (StringRef File : Files) OutputFiles.push_back(File); @@ -1226,12 +1043,9 @@ } // Link the remaining device files using the device linker. - llvm::Triple Triple(LinkerArgs.getLastArgValue(OPT_triple_EQ)); - bool RequiresLinking = - !Args.hasArg(OPT_embed_bitcode) && - !(Input.empty() && InputFiles.size() == 1 && Triple.isNVPTX()); - auto OutputOrErr = RequiresLinking ? linkDevice(InputFiles, LinkerArgs) - : InputFiles.front(); + auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode) + ? linkDevice(InputFiles, LinkerArgs) + : InputFiles.front(); if (!OutputOrErr) return OutputOrErr.takeError(); @@ -1457,8 +1271,6 @@ SaveTemps = Args.hasArg(OPT_save_temps); ExecutableName = Args.getLastArgValue(OPT_o, "a.out"); CudaBinaryPath = Args.getLastArgValue(OPT_cuda_path_EQ).str(); - if (!CudaBinaryPath.empty()) - CudaBinaryPath = CudaBinaryPath + "/bin"; parallel::strategy = hardware_concurrency(1); if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) {