diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8303,11 +8303,27 @@ StringRef Arch = (OffloadAction->getOffloadingArch()) ? OffloadAction->getOffloadingArch() : TCArgs.getLastArgValue(options::OPT_march_EQ); + StringRef Kind = + Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()); + + ArgStringList Features; + SmallVector FeatureArgs; + getTargetFeatures(TC->getDriver(), TC->getTriple(), Args, Features, false); + llvm::copy_if(Features, std::back_inserter(FeatureArgs), + [](StringRef Arg) { return !Arg.startswith("-target"); }); + + SmallVector Parts{ + "file=" + File.str(), + "triple=" + TC->getTripleString(), + "arch=" + Arch.str(), + "kind=" + Kind.str(), + }; - CmdArgs.push_back(Args.MakeArgString( - "--image=file=" + File + "," + "triple=" + TC->getTripleString() + "," + - "arch=" + Arch + "," + "kind=" + - Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()))); + if (TC->getDriver().isUsingLTO(/* IsOffload */ true)) + for (StringRef Feature : FeatureArgs) + Parts.emplace_back("feature=" + Feature.str()); + + CmdArgs.push_back(Args.MakeArgString("--image=" + llvm::join(Parts, ","))); } C.addCommand(std::make_unique( @@ -8365,20 +8381,6 @@ } if (D.isUsingLTO(/* IsOffload */ true)) { - // Pass in target features for each toolchain. - for (auto &I : - llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { - const ToolChain *TC = I.second; - const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); - ArgStringList FeatureArgs; - TC->addClangTargetOptions(TCArgs, FeatureArgs, Action::OFK_OpenMP); - auto FeatureIt = llvm::find(FeatureArgs, "-target-feature"); - if (FeatureIt != FeatureArgs.end()) - CmdArgs.push_back( - Args.MakeArgString("-target-feature=" + TC->getTripleString() + - "=" + *(FeatureIt + 1))); - } - // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt; diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c --- a/clang/test/Driver/openmp-offload-gpu-new.c +++ b/clang/test/Driver/openmp-offload-gpu-new.c @@ -115,3 +115,8 @@ // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-XLINKER %s // CHECK-XLINKER: -device-linker=a{{.*}}-device-linker=nvptx64-nvidia-cuda=b{{.*}}-device-linker=nvptx64-nvidia-cuda=c{{.*}}-- + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \ +// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-FEATURES %s + +// CHECK-LTO-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx{{[0-9]+}} diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -68,10 +68,6 @@ cl::desc("Path of linker binary"), cl::cat(ClangLinkerWrapperCategory)); -static cl::opt - TargetFeatures("target-feature", cl::desc("Target features for triple"), - cl::cat(ClangLinkerWrapperCategory)); - static cl::opt OptLevel("opt-level", cl::desc("Optimization level for LTO"), cl::init("O2"), @@ -726,16 +722,24 @@ } } -// Get the target features passed in from the driver as =. -std::vector getTargetFeatures(const Triple &TheTriple) { - std::vector Features; - auto TargetAndFeatures = StringRef(TargetFeatures).split('='); - if (TargetAndFeatures.first != TheTriple.getTriple()) - return Features; +// Get the list of target features from the input file and unify them such that +// if there are multiple +xxx or -xxx features we only keep the last one. +std::vector getTargetFeatures(ArrayRef InputFiles) { + SmallVector Features; + for (const OffloadFile &File : InputFiles) { + for (auto Arg : llvm::split(File.getBinary()->getString("feature"), ",")) + Features.emplace_back(Arg); + } + + // Only add a feature if it hasn't been seen before starting from the end. + std::vector UnifiedFeatures; + DenseSet UsedFeatures; + for (StringRef Feature : llvm::reverse(Features)) { + if (UsedFeatures.insert(Feature.drop_front()).second) + UnifiedFeatures.push_back(Feature.str()); + } - for (auto Feature : llvm::split(TargetAndFeatures.second, ',')) - Features.push_back(Feature.str()); - return Features; + return UnifiedFeatures; } CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) { @@ -755,6 +759,7 @@ template > std::unique_ptr createLTO( const Triple &TheTriple, StringRef Arch, bool WholeProgram, + const std::vector &Features, ModuleHook Hook = [](size_t, const Module &) { return true; }) { lto::Config Conf; lto::ThinBackend Backend; @@ -765,7 +770,7 @@ Conf.CPU = Arch.str(); Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple); - Conf.MAttrs = getTargetFeatures(TheTriple); + Conf.MAttrs = Features; Conf.CGOptLevel = getCGOptLevel(OptLevel[1] - '0'); Conf.OptLevel = OptLevel[1] - '0'; if (Conf.OptLevel > 0) @@ -902,10 +907,12 @@ }; // We assume visibility of the whole program if every input file was bitcode. + auto Features = getTargetFeatures(BitcodeInputFiles); bool WholeProgram = InputFiles.empty(); auto LTOBackend = - (EmbedBitcode) ? createLTO(TheTriple, Arch, WholeProgram, OutputBitcode) - : createLTO(TheTriple, Arch, WholeProgram); + (EmbedBitcode) + ? createLTO(TheTriple, Arch, WholeProgram, Features, OutputBitcode) + : createLTO(TheTriple, Arch, WholeProgram, Features); // We need to resolve the symbols so the LTO backend knows which symbols need // to be kept or can be internalized. This is a simplified symbol resolution diff --git a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp --- a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp +++ b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/WithColor.h" using namespace llvm; @@ -71,9 +72,18 @@ SmallVector BinaryData; raw_svector_ostream OS(BinaryData); for (StringRef Image : DeviceImages) { + BumpPtrAllocator Alloc; + StringSaver Saver(Alloc); + StringMap Args; - for (StringRef Arg : llvm::split(Image, ",")) - Args.insert(Arg.split("=")); + for (StringRef Arg : llvm::split(Image, ",")) { + auto KeyAndValue = Arg.split("="); + if (Args.count(KeyAndValue.first)) + Args[KeyAndValue.first] = + Saver.save(Args[KeyAndValue.first] + "," + KeyAndValue.second); + else + Args[KeyAndValue.first] = KeyAndValue.second; + } if (!Args.count("triple") || !Args.count("file")) return reportError(createStringError(