Changeset View
Changeset View
Standalone View
Standalone View
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
Show All 25 Lines | |||||
using namespace clang::driver; | using namespace clang::driver; | ||||
using namespace clang::driver::toolchains; | using namespace clang::driver::toolchains; | ||||
using namespace clang::driver::tools; | using namespace clang::driver::tools; | ||||
using namespace clang; | using namespace clang; | ||||
using namespace llvm::opt; | using namespace llvm::opt; | ||||
namespace { | namespace { | ||||
static const char *getOutputFileName(Compilation &C, StringRef Base, | |||||
const char *Postfix, | |||||
const char *Extension) { | |||||
const char *OutputFileName; | |||||
if (C.getDriver().isSaveTempsEnabled()) { | |||||
OutputFileName = | |||||
C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); | |||||
} else { | |||||
std::string TmpName = | |||||
C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); | |||||
OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); | |||||
} | |||||
return OutputFileName; | |||||
} | |||||
static void addLLCOptArg(const llvm::opt::ArgList &Args, | |||||
llvm::opt::ArgStringList &CmdArgs) { | |||||
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { | |||||
StringRef OOpt = "0"; | |||||
if (A->getOption().matches(options::OPT_O4) || | |||||
A->getOption().matches(options::OPT_Ofast)) | |||||
OOpt = "3"; | |||||
else if (A->getOption().matches(options::OPT_O0)) | |||||
OOpt = "0"; | |||||
else if (A->getOption().matches(options::OPT_O)) { | |||||
// Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 | |||||
// so we map -Os/-Oz to -O2. | |||||
// Only clang supports -Og, and maps it to -O1. | |||||
// We map anything else to -O2. | |||||
OOpt = llvm::StringSwitch<const char *>(A->getValue()) | |||||
.Case("1", "1") | |||||
.Case("2", "2") | |||||
.Case("3", "3") | |||||
.Case("s", "2") | |||||
.Case("z", "2") | |||||
.Case("g", "1") | |||||
.Default("0"); | |||||
} | |||||
CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); | |||||
} | |||||
} | |||||
static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, | static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, | ||||
std::string &GPUArch) { | std::string &GPUArch) { | ||||
if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { | if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { | ||||
std::string ErrMsg = | std::string ErrMsg = | ||||
llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); | llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); | ||||
TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; | TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; | ||||
return false; | return false; | ||||
} | } | ||||
return true; | return true; | ||||
} | } | ||||
} // namespace | } // namespace | ||||
const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( | |||||
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, | |||||
const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, | |||||
StringRef SubArchName, StringRef OutputFilePrefix) const { | |||||
ArgStringList CmdArgs; | |||||
for (const auto &II : Inputs) | |||||
if (II.isFilename()) | |||||
CmdArgs.push_back(II.getFilename()); | |||||
bool HasLibm = false; | |||||
if (Args.hasArg(options::OPT_l)) { | |||||
auto Lm = Args.getAllArgValues(options::OPT_l); | |||||
for (auto &Lib : Lm) { | |||||
if (Lib == "m") { | |||||
HasLibm = true; | |||||
break; | |||||
} | |||||
} | |||||
if (HasLibm) { | |||||
// This is not certain to work. The device libs added here, and passed to | |||||
// llvm-link, are missing attributes that they expect to be inserted when | |||||
// passed to mlink-builtin-bitcode. The amdgpu backend does not generate | |||||
// conservatively correct code when attributes are missing, so this may | |||||
// be the root cause of miscompilations. Passing via mlink-builtin-bitcode | |||||
// ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes | |||||
// on each function, see D28538 for context. | |||||
// Potential workarounds: | |||||
// - unconditionally link all of the device libs to every translation | |||||
// unit in clang via mlink-builtin-bitcode | |||||
// - build a libm bitcode file as part of the DeviceRTL and explictly | |||||
// mlink-builtin-bitcode the rocm device libs components at build time | |||||
// - drop this llvm-link fork in favour or some calls into LLVM, chosen | |||||
// to do basically the same work as llvm-link but with that call first | |||||
// - write an opt pass that sets that on every function it sees and pipe | |||||
// the device-libs bitcode through that on the way to this llvm-link | |||||
SmallVector<std::string, 12> BCLibs = | |||||
AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); | |||||
for (StringRef BCFile : BCLibs) | |||||
CmdArgs.push_back(Args.MakeArgString(BCFile)); | |||||
} | |||||
} | |||||
AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn", | |||||
SubArchName, /*isBitCodeSDL=*/true, | |||||
/*postClangLink=*/false); | |||||
// Add an intermediate output file. | |||||
CmdArgs.push_back("-o"); | |||||
const char *OutputFileName = | |||||
getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); | |||||
CmdArgs.push_back(OutputFileName); | |||||
const char *Exec = | |||||
Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); | |||||
C.addCommand(std::make_unique<Command>( | |||||
JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, | |||||
InputInfo(&JA, Args.MakeArgString(OutputFileName)))); | |||||
// If we linked in libm definitions late we run another round of optimizations | |||||
// to inline the definitions and fold what is foldable. | |||||
if (HasLibm) { | |||||
ArgStringList OptCmdArgs; | |||||
const char *OptOutputFileName = | |||||
getOutputFileName(C, OutputFilePrefix, "-linked-opt", "bc"); | |||||
addLLCOptArg(Args, OptCmdArgs); | |||||
OptCmdArgs.push_back(OutputFileName); | |||||
OptCmdArgs.push_back("-o"); | |||||
OptCmdArgs.push_back(OptOutputFileName); | |||||
const char *OptExec = | |||||
Args.MakeArgString(getToolChain().GetProgramPath("opt")); | |||||
C.addCommand(std::make_unique<Command>( | |||||
JA, *this, ResponseFileSupport::AtFileCurCP(), OptExec, OptCmdArgs, | |||||
InputInfo(&JA, Args.MakeArgString(OutputFileName)), | |||||
InputInfo(&JA, Args.MakeArgString(OptOutputFileName)))); | |||||
OutputFileName = OptOutputFileName; | |||||
} | |||||
return OutputFileName; | |||||
} | |||||
const char *AMDGCN::OpenMPLinker::constructLlcCommand( | |||||
Compilation &C, const JobAction &JA, const InputInfoList &Inputs, | |||||
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, | |||||
llvm::StringRef OutputFilePrefix, const char *InputFileName, | |||||
bool OutputIsAsm) const { | |||||
// Construct llc command. | |||||
ArgStringList LlcArgs; | |||||
// The input to llc is the output from opt. | |||||
LlcArgs.push_back(InputFileName); | |||||
// Pass optimization arg to llc. | |||||
addLLCOptArg(Args, LlcArgs); | |||||
LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); | |||||
LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); | |||||
LlcArgs.push_back( | |||||
Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); | |||||
for (const Arg *A : Args.filtered(options::OPT_mllvm)) { | |||||
LlcArgs.push_back(A->getValue(0)); | |||||
} | |||||
// Add output filename | |||||
LlcArgs.push_back("-o"); | |||||
const char *LlcOutputFile = | |||||
getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); | |||||
LlcArgs.push_back(LlcOutputFile); | |||||
const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); | |||||
C.addCommand(std::make_unique<Command>( | |||||
JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs, | |||||
InputInfo(&JA, Args.MakeArgString(LlcOutputFile)))); | |||||
return LlcOutputFile; | |||||
} | |||||
void AMDGCN::OpenMPLinker::constructLldCommand( | |||||
Compilation &C, const JobAction &JA, const InputInfoList &Inputs, | |||||
const InputInfo &Output, const llvm::opt::ArgList &Args, | |||||
const char *InputFileName) const { | |||||
// Construct lld command. | |||||
// The output from ld.lld is an HSA code object file. | |||||
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", | |||||
"-shared", "-o", Output.getFilename(), | |||||
InputFileName}; | |||||
const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); | |||||
C.addCommand(std::make_unique<Command>( | |||||
JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs, | |||||
InputInfo(&JA, Args.MakeArgString(Output.getFilename())))); | |||||
} | |||||
// For amdgcn the inputs of the linker job are device bitcode and output is | |||||
// object file. It calls llvm-link, opt, llc, then lld steps. | |||||
void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, | |||||
const InputInfo &Output, | |||||
const InputInfoList &Inputs, | |||||
const ArgList &Args, | |||||
const char *LinkingOutput) const { | |||||
const ToolChain &TC = getToolChain(); | |||||
assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); | |||||
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC = | |||||
static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC); | |||||
std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str(); | |||||
if (GPUArch.empty()) { | |||||
if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch)) | |||||
return; | |||||
} | |||||
// Prefix for temporary file name. | |||||
std::string Prefix; | |||||
for (const auto &II : Inputs) | |||||
if (II.isFilename()) | |||||
Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch; | |||||
assert(Prefix.length() && "no linker inputs are files "); | |||||
// Each command outputs different files. | |||||
const char *LLVMLinkCommand = constructLLVMLinkCommand( | |||||
AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix); | |||||
// Produce readable assembly if save-temps is enabled. | |||||
if (C.getDriver().isSaveTempsEnabled()) | |||||
constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand, | |||||
/*OutputIsAsm=*/true); | |||||
const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch, | |||||
Prefix, LLVMLinkCommand); | |||||
constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); | |||||
} | |||||
AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, | AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, | ||||
const llvm::Triple &Triple, | const llvm::Triple &Triple, | ||||
const ToolChain &HostTC, | const ToolChain &HostTC, | ||||
const ArgList &Args) | const ArgList &Args) | ||||
: ROCMToolChain(D, Triple, Args), HostTC(HostTC) { | : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { | ||||
// Lookup binaries into the driver directory, this is used to | // Lookup binaries into the driver directory, this is used to | ||||
// discover the clang-offload-bundler executable. | // discover the clang-offload-bundler executable. | ||||
getProgramPaths().push_back(getDriver().Dir); | getProgramPaths().push_back(getDriver().Dir); | ||||
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | if (!BoundArch.empty()) { | ||||
DAL->eraseArg(options::OPT_march_EQ); | DAL->eraseArg(options::OPT_march_EQ); | ||||
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), | ||||
BoundArch); | BoundArch); | ||||
} | } | ||||
return DAL; | return DAL; | ||||
} | } | ||||
Tool *AMDGPUOpenMPToolChain::buildLinker() const { | |||||
assert(getTriple().isAMDGCN()); | |||||
return new tools::AMDGCN::OpenMPLinker(*this); | |||||
} | |||||
void AMDGPUOpenMPToolChain::addClangWarningOptions( | void AMDGPUOpenMPToolChain::addClangWarningOptions( | ||||
ArgStringList &CC1Args) const { | ArgStringList &CC1Args) const { | ||||
HostTC.addClangWarningOptions(CC1Args); | HostTC.addClangWarningOptions(CC1Args); | ||||
} | } | ||||
ToolChain::CXXStdlibType | ToolChain::CXXStdlibType | ||||
AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { | AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { | ||||
return HostTC.GetCXXStdlibType(Args); | return HostTC.GetCXXStdlibType(Args); | ||||
Show All 30 Lines |