diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -36,6 +36,7 @@ ToolChains/AIX.cpp ToolChains/Ananas.cpp ToolChains/AMDGPU.cpp + ToolChains/AMDGPUOpenMP.cpp ToolChains/AVR.cpp ToolChains/BareMetal.cpp ToolChains/Clang.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -10,6 +10,7 @@ #include "InputInfo.h" #include "ToolChains/AIX.h" #include "ToolChains/AMDGPU.h" +#include "ToolChains/AMDGPUOpenMP.h" #include "ToolChains/AVR.h" #include "ToolChains/Ananas.h" #include "ToolChains/BareMetal.h" @@ -739,18 +740,27 @@ Diag(clang::diag::err_drv_invalid_omp_target) << Val; else { const ToolChain *TC; - // CUDA toolchains have to be selected differently. They pair host + // Device toolchains have to be selected differently. They pair host // and device in their implementation. - if (TT.isNVPTX()) { + if (TT.isNVPTX() || TT.isAMDGCN()) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); assert(HostTC && "Host toolchain should be always defined."); - auto &CudaTC = + auto &DeviceTC = ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; - if (!CudaTC) - CudaTC = std::make_unique( - *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); - TC = CudaTC.get(); + if (!DeviceTC) { + if (TT.isNVPTX()) + DeviceTC = std::make_unique( + *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); + else if (TT.isAMDGCN()) + DeviceTC = + std::make_unique( + *this, TT, *HostTC, C.getInputArgs()); + else + assert(DeviceTC && "Device toolchain not defined."); + } + + TC = DeviceTC.get(); } else TC = &getToolChain(C.getInputArgs(), TT); C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -64,6 +64,13 @@ bool IsIntegratedAssemblerDefault() const override { return true; } bool IsMathErrnoDefault() const override { return false; } + bool useIntegratedAs() const override { return true; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault() const override { return false; } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h new file mode 100644 --- /dev/null +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -0,0 +1,106 @@ +//===- AMDGPUOpenMP.h - AMDGPUOpenMP ToolChain Implementation -*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H + +#include "AMDGPU.h" +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" + +namespace clang { +namespace driver { + +namespace tools { + +namespace AMDGCN { +// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with +// device library, then compiles it to ISA in a shared object. +class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool { +public: + OpenMPLinker(const ToolChain &TC) + : Tool("AMDGCN::OpenMPLinker", "amdgcn-link", TC) {} + + bool hasIntegratedCPP() const override { return false; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; + +private: + /// \return llvm-link output file name. + const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix) const; + + /// \return llc output file name. + const char *constructLlcCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix, + const char *InputFileName, + bool OutputIsAsm = false) const; + + void constructLldCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, const InputInfo &Output, + const llvm::opt::ArgList &Args, + const char *InputFileName) const; +}; + +} // end namespace AMDGCN +} // end namespace tools + +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final + : public ROCMToolChain { +public: + AMDGPUOpenMPToolChain(const Driver &D, const llvm::Triple &Triple, + const ToolChain &HostTC, + const llvm::opt::ArgList &Args); + + const llvm::Triple *getAuxTriple() const override { + return &HostTC.getTriple(); + } + + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; + void + addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const override; + void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; + CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + + SanitizerMask getSupportedSanitizers() const override; + + VersionTuple + computeMSVCVersion(const Driver *D, + const llvm::opt::ArgList &Args) const override; + + const ToolChain &HostTC; + +protected: + Tool *buildLinker() const override; +}; + +} // end namespace toolchains +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -0,0 +1,262 @@ +//===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUOpenMP.h" +#include "AMDGPU.h" +#include "CommonArgs.h" +#include "InputInfo.h" +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +using namespace clang::driver; +using namespace clang::driver::toolchains; +using namespace clang::driver::tools; +using namespace clang; +using namespace llvm::opt; + +namespace { + +static const char *getOutputFileName(Compilation &C, StringRef Base, + const char *Postfix, + const char *Extension) { + const char *OutputFileName; + if (C.getDriver().isSaveTempsEnabled()) { + OutputFileName = + C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); + } else { + std::string TmpName = + C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); + OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); + } + return OutputFileName; +} + +static void addLLCOptArg(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + StringRef OOpt = "0"; + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + OOpt = "3"; + else if (A->getOption().matches(options::OPT_O0)) + OOpt = "0"; + else if (A->getOption().matches(options::OPT_O)) { + // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 + // so we map -Os/-Oz to -O2. + // Only clang supports -Og, and maps it to -O1. + // We map anything else to -O2. + OOpt = llvm::StringSwitch(A->getValue()) + .Case("1", "1") + .Case("2", "2") + .Case("3", "3") + .Case("s", "2") + .Case("z", "2") + .Case("g", "1") + .Default("0"); + } + CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); + } +} +} // namespace + +const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const ArgList &Args, StringRef SubArchName, + StringRef OutputFilePrefix) const { + ArgStringList CmdArgs; + + for (const auto &II : Inputs) + if (II.isFilename()) + CmdArgs.push_back(II.getFilename()); + // Add an intermediate output file. + CmdArgs.push_back("-o"); + const char *OutputFileName = + getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); + CmdArgs.push_back(OutputFileName); + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(OutputFileName)))); + return OutputFileName; +} + +const char *AMDGCN::OpenMPLinker::constructLlcCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix, const char *InputFileName, + bool OutputIsAsm) const { + // Construct llc command. + ArgStringList LlcArgs; + // The input to llc is the output from opt. + LlcArgs.push_back(InputFileName); + // Pass optimization arg to llc. + addLLCOptArg(Args, LlcArgs); + LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); + LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); + LlcArgs.push_back( + Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); + + for (const Arg *A : Args.filtered(options::OPT_mllvm)) { + LlcArgs.push_back(A->getValue(0)); + } + + // Add output filename + LlcArgs.push_back("-o"); + const char *LlcOutputFile = + getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); + LlcArgs.push_back(LlcOutputFile); + const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(LlcOutputFile)))); + return LlcOutputFile; +} + +void AMDGCN::OpenMPLinker::constructLldCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const InputInfo &Output, const llvm::opt::ArgList &Args, + const char *InputFileName) const { + // Construct lld command. + // The output from ld.lld is an HSA code object file. + ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", + "-shared", "-o", Output.getFilename(), + InputFileName}; + + const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(Output.getFilename())))); +} + +// For amdgcn the inputs of the linker job are device bitcode and output is +// object file. It calls llvm-link, opt, llc, then lld steps. +void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); + + StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); + assert(GPUArch.startswith("gfx") && "Unsupported sub arch"); + + // Prefix for temporary file name. + std::string Prefix; + for (const auto &II : Inputs) + if (II.isFilename()) + Prefix = + llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch.str(); + assert(Prefix.length() && "no linker inputs are files "); + + // Each command outputs different files. + const char *LLVMLinkCommand = + constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix); + const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch, + Prefix, LLVMLinkCommand); + constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); +} + +AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, + const llvm::Triple &Triple, + const ToolChain &HostTC, + const ArgList &Args) + : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { + // Lookup binaries into the driver directory, this is used to + // discover the clang-offload-bundler executable. + getProgramPaths().push_back(getDriver().Dir); +} + +void AMDGPUOpenMPToolChain::addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadingKind) const { + HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); + + StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + assert(!GpuArch.empty() && "Must have an explicit GPU arch."); + assert(DeviceOffloadingKind == Action::OFK_OpenMP && + "Only OpenMP offloading kinds are supported."); + + CC1Args.push_back("-target-cpu"); + CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); + CC1Args.push_back("-fcuda-is-device"); + CC1Args.push_back("-emit-llvm-bc"); +} + +llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( + const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { + DerivedArgList *DAL = + HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); + if (!DAL) + DAL = new DerivedArgList(Args.getBaseArgs()); + + const OptTable &Opts = getDriver().getOpts(); + + if (DeviceOffloadKind != Action::OFK_OpenMP) { + for (Arg *A : Args) { + DAL->append(A); + } + } + + if (!BoundArch.empty()) { + DAL->eraseArg(options::OPT_march_EQ); + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + BoundArch); + } + + return DAL; +} + +Tool *AMDGPUOpenMPToolChain::buildLinker() const { + assert(getTriple().isAMDGCN()); + return new tools::AMDGCN::OpenMPLinker(*this); +} + +void AMDGPUOpenMPToolChain::addClangWarningOptions( + ArgStringList &CC1Args) const { + HostTC.addClangWarningOptions(CC1Args); +} + +ToolChain::CXXStdlibType +AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { + return HostTC.GetCXXStdlibType(Args); +} + +void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( + const ArgList &DriverArgs, ArgStringList &CC1Args) const { + HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); +} + +void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, + ArgStringList &CC1Args) const { + HostTC.AddIAMCUIncludeArgs(Args, CC1Args); +} + +SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const { + // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it + // allows sanitizer arguments on the command line if they are supported by the + // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command + // line arguments for any of these "supported" sanitizers. That means that no + // sanitization of device code is actually supported at this time. + // + // This behavior is necessary because the host and device toolchains + // invocations often share the command line, so the device toolchain must + // tolerate flags meant only for the host toolchain. + return HostTC.getSupportedSanitizers(); +} + +VersionTuple +AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, + const ArgList &Args) const { + return HostTC.computeMSVCVersion(D, Args); +} diff --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h --- a/clang/lib/Driver/ToolChains/HIP.h +++ b/clang/lib/Driver/ToolChains/HIP.h @@ -71,15 +71,6 @@ void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override; - - bool useIntegratedAs() const override { return true; } - bool isCrossCompiling() const override { return true; } - bool isPICDefault() const override { return false; } - bool isPIEDefault() const override { return false; } - bool isPICDefaultForced() const override { return false; } - bool SupportsProfiling() const override { return false; } - bool IsMathErrnoDefault() const override { return false; } - void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; void diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -0,0 +1,36 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ +// RUN: | FileCheck %s + +// verify the tools invocations +// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}} +// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}} +// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}} +// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" +// CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" +// CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" +// CHECK: clang-offload-wrapper{{.*}}"-target" "x86_64-unknown-linux-gnu" "-o" "{{.*}}a-{{.*}}.bc" {{.*}}amdgpu-openmp-toolchain-{{.*}}.out" +// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-o" "{{.*}}a-{{.*}}.o" "-x" "ir" "{{.*}}a-{{.*}}.bc" +// CHECK: ld{{.*}}"-o" "a.out"{{.*}}"{{.*}}amdgpu-openmp-toolchain-{{.*}}.o" "{{.*}}a-{{.*}}.o" "-lomp" "-lomptarget" + +// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-PHASES %s +// phases +// CHECK-PHASES: 0: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (host-openmp) +// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) +// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp) +// CHECK-PHASES: 3: backend, {2}, assembler, (host-openmp) +// CHECK-PHASES: 4: assembler, {3}, object, (host-openmp) +// CHECK-PHASES: 5: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (device-openmp) +// CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp) +// CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp) +// CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir +// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp) +// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp) +// CHECK-PHASES: 11: linker, {10}, image, (device-openmp) +// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image +// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp) +// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp) +// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp) +// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp) +