Index: cfe/trunk/include/clang/Driver/ToolChain.h =================================================================== --- cfe/trunk/include/clang/Driver/ToolChain.h +++ cfe/trunk/include/clang/Driver/ToolChain.h @@ -40,6 +40,7 @@ class Compilation; class CudaInstallationDetector; class Driver; + class InputInfo; class JobAction; class RegisterEffectiveTriple; class SanitizerArgs; @@ -172,6 +173,11 @@ /// while the aux triple is the host (CPU) toolchain, e.g. x86-linux-gnu. virtual const llvm::Triple *getAuxTriple() const { return nullptr; } + /// Some toolchains need to modify the file name, for example to replace the + /// extension for object files with .cubin for OpenMP offloading to Nvidia + /// GPUs. + virtual std::string getInputFilename(const InputInfo &Input) const; + llvm::Triple::ArchType getArch() const { return Triple.getArch(); } StringRef getArchName() const { return Triple.getArchName(); } StringRef getPlatform() const { return Triple.getVendorName(); } Index: cfe/trunk/lib/Driver/ToolChain.cpp =================================================================== --- cfe/trunk/lib/Driver/ToolChain.cpp +++ cfe/trunk/lib/Driver/ToolChain.cpp @@ -215,6 +215,10 @@ } } +std::string ToolChain::getInputFilename(const InputInfo &Input) const { + return Input.getFilename(); +} + bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const { return false; } Index: cfe/trunk/lib/Driver/ToolChains/Clang.cpp =================================================================== --- cfe/trunk/lib/Driver/ToolChains/Clang.cpp +++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp @@ -5310,12 +5310,15 @@ if (I) Triples += ','; + // Find ToolChain for this input. Action::OffloadKind CurKind = Action::OFK_Host; const ToolChain *CurTC = &getToolChain(); const Action *CurDep = JA.getInputs()[I]; if (const auto *OA = dyn_cast(CurDep)) { + CurTC = nullptr; OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) { + assert(CurTC == nullptr && "Expected one dependence!"); CurKind = A->getOffloadingDeviceKind(); CurTC = TC; }); @@ -5336,7 +5339,17 @@ for (unsigned I = 0; I < Inputs.size(); ++I) { if (I) UB += ','; - UB += Inputs[I].getFilename(); + + // Find ToolChain for this input. + const ToolChain *CurTC = &getToolChain(); + if (const auto *OA = dyn_cast(JA.getInputs()[I])) { + CurTC = nullptr; + OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) { + assert(CurTC == nullptr && "Expected one dependence!"); + CurTC = TC; + }); + } + UB += CurTC->getInputFilename(Inputs[I]); } CmdArgs.push_back(TCArgs.MakeArgString(UB)); @@ -5396,13 +5409,7 @@ for (unsigned I = 0; I < Outputs.size(); ++I) { if (I) UB += ','; - SmallString<256> OutputFileName(Outputs[I].getFilename()); - // Change extension of target files for OpenMP offloading - // to NVIDIA GPUs. - if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() && - JA.isOffloading(Action::OFK_OpenMP)) - llvm::sys::path::replace_extension(OutputFileName, "cubin"); - UB += OutputFileName; + UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]); } CmdArgs.push_back(TCArgs.MakeArgString(UB)); CmdArgs.push_back("-unbundle"); Index: cfe/trunk/lib/Driver/ToolChains/Cuda.h =================================================================== --- cfe/trunk/lib/Driver/ToolChains/Cuda.h +++ cfe/trunk/lib/Driver/ToolChains/Cuda.h @@ -137,10 +137,12 @@ const ToolChain &HostTC, const llvm::opt::ArgList &Args, const Action::OffloadKind OK); - virtual const llvm::Triple *getAuxTriple() const override { + const llvm::Triple *getAuxTriple() const override { return &HostTC.getTriple(); } + std::string getInputFilename(const InputInfo &Input) const override; + llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; Index: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp +++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp @@ -301,10 +301,7 @@ CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); CmdArgs.push_back("--output-file"); - SmallString<256> OutputFileName(Output.getFilename()); - if (JA.isOffloading(Action::OFK_OpenMP)) - llvm::sys::path::replace_extension(OutputFileName, "cubin"); - CmdArgs.push_back(Args.MakeArgString(OutputFileName)); + CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output))); for (const auto& II : Inputs) CmdArgs.push_back(Args.MakeArgString(II.getFilename())); @@ -431,11 +428,8 @@ if (!II.isFilename()) continue; - SmallString<256> Name(II.getFilename()); - llvm::sys::path::replace_extension(Name, "cubin"); - - const char *CubinF = - C.addTempFile(C.getArgs().MakeArgString(Name)); + const char *CubinF = C.addTempFile( + C.getArgs().MakeArgString(getToolChain().getInputFilename(II))); CmdArgs.push_back(CubinF); } @@ -463,6 +457,20 @@ getProgramPaths().push_back(getDriver().Dir); } +std::string CudaToolChain::getInputFilename(const InputInfo &Input) const { + // Only object files are changed, for example assembly files keep their .s + // extensions. CUDA also continues to use .o as they don't use nvlink but + // fatbinary. + if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object)) + return ToolChain::getInputFilename(Input); + + // Replace extension for object files with cubin because nvlink relies on + // these particular file names. + SmallString<256> Filename(ToolChain::getInputFilename(Input)); + llvm::sys::path::replace_extension(Filename, "cubin"); + return Filename.str(); +} + void CudaToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Index: cfe/trunk/test/Driver/openmp-offload-gpu.c =================================================================== --- cfe/trunk/test/Driver/openmp-offload-gpu.c +++ cfe/trunk/test/Driver/openmp-offload-gpu.c @@ -10,7 +10,8 @@ /// ########################################################################### /// Check -Xopenmp-target uses one of the archs provided when several archs are used. -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \ +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s // CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60" @@ -19,7 +20,9 @@ /// ########################################################################### /// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present. -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \ +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp \ +// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \ +// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s // CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35" @@ -28,43 +31,67 @@ /// ########################################################################### /// Check cubin file generation and usage by nvlink -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-CUBIN %s +// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s +/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction +// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s + +// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]" +// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]" +// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]" + +/// ########################################################################### -// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}.s" -// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s" -// CHK-CUBIN-NEXT: nvlink" {{.*}}.cubin" +/// Check unbundlink of assembly file, cubin file generation and usage by nvlink +// RUN: touch %t.s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s +/// Use DAG to ensure that assembly file has been unbundled. +// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]" +// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-outputs={{.*}}[[PTX]] +// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG-SAME: "-unbundle" +// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]" /// ########################################################################### -/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction -// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s +/// Check cubin file generation and bundling +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s -c 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s -// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}.s" -// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s" -// CHK-CUBIN-DARWIN-NEXT: nvlink" {{.*}}.cubin" +// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]" +// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]" +// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]] /// ########################################################################### -/// Check cubin file generation and usage by nvlink -// RUN: touch %t1.o -// RUN: touch %t2.o -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s +/// Check cubin file unbundling and usage by nvlink +// RUN: touch %t.o +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s -// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin" +/// Use DAG to ensure that cubin file has been unbundled. +// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]" +// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]] +// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle" /// ########################################################################### -/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction +/// Check cubin file generation and usage by nvlink // RUN: touch %t1.o // RUN: touch %t2.o -// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s +// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s +/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction +// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s -// CHK-TWOCUBIN-DARWIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin" +// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin" /// ########################################################################### @@ -77,7 +104,8 @@ /// ########################################################################### /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \ +// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s // CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c" @@ -86,7 +114,8 @@ /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP /// Check that the flag is passed when -fopenmp-relocatable-target is used. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \ +// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s // CHK-PTXAS-RELO: ptxas{{.*}}" "-c"