Diff 532748

clang/lib/Driver/ToolChains/Clang.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,441 Lines • ▼ Show 20 Lines	for (const InputInfo &Input : Inputs) {

if (TC->getTriple().isAMDGPU()) {		if (TC->getTriple().isAMDGPU()) {
for (StringRef Feature : llvm::split(Arch.split(':').second, ':')) {		for (StringRef Feature : llvm::split(Arch.split(':').second, ':')) {
FeatureArgs.emplace_back(		FeatureArgs.emplace_back(
Args.MakeArgString(Feature.take_back() + Feature.drop_back()));		Args.MakeArgString(Feature.take_back() + Feature.drop_back()));
}		}
}		}

// TODO: We need to pass in the full target-id and handle it properly in the
// linker wrapper.
SmallVector<std::string> Parts{		SmallVector<std::string> Parts{
"file=" + File.str(),		"file=" + File.str(),
"triple=" + TC->getTripleString(),		"triple=" + TC->getTripleString(),
"arch=" + getProcessorFromTargetID(TC->getTriple(), Arch).str(),		"arch=" + Arch.str(),
"kind=" + Kind.str(),		"kind=" + Kind.str(),
};		};

if (TC->getDriver().isUsingLTO(/* IsOffload */ true) \|\|		if (TC->getDriver().isUsingLTO(/* IsOffload */ true) \|\|
TC->getTriple().isAMDGPU())		TC->getTriple().isAMDGPU())
for (StringRef Feature : FeatureArgs)		for (StringRef Feature : FeatureArgs)
Parts.emplace_back("feature=" + Feature.str());		Parts.emplace_back("feature=" + Feature.str());

▲ Show 20 Lines • Show All 124 Lines • Show Last 20 Lines

clang/test/Driver/amdgpu-openmp-toolchain.c

	Show First 20 Lines • Show All 59 Lines • ▼ Show 20 Lines

	// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 -nogpulib \			// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 -nogpulib \
	// RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 \| \			// RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 \| \
	// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB			// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB
	// CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.}}ocml.bc"{{.}}ockl.bc"{{.}}oclc_daz_opt_on.bc"{{.}}oclc_unsafe_math_off.bc"{{.}}oclc_finite_only_off.bc"{{.}}oclc_correctly_rounded_sqrt_on.bc"{{.}}oclc_wavefrontsize64_on.bc"{{.}}oclc_isa_version_803.bc"			// CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.}}ocml.bc"{{.}}ockl.bc"{{.}}oclc_daz_opt_on.bc"{{.}}oclc_unsafe_math_off.bc"{{.}}oclc_finite_only_off.bc"{{.}}oclc_correctly_rounded_sqrt_on.bc"{{.}}oclc_wavefrontsize64_on.bc"{{.}}oclc_isa_version_803.bc"

	// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \			// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
	// RUN: -nogpulib %s 2>&1 \| FileCheck %s --check-prefix=CHECK-TARGET-ID			// RUN: -nogpulib %s 2>&1 \| FileCheck %s --check-prefix=CHECK-TARGET-ID
	// CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack			// CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp,feature=-sramecc,feature=+xnack

	// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \			// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \
	// RUN: -nogpulib %s 2>&1 \| FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR			// RUN: -nogpulib %s 2>&1 \| FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
	// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+'			// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+'

clang/test/Driver/linker-wrapper.c

	// REQUIRES: x86-registered-target			// REQUIRES: x86-registered-target
	// REQUIRES: nvptx-registered-target			// REQUIRES: nvptx-registered-target
	// REQUIRES: amdgpu-registered-target			// REQUIRES: amdgpu-registered-target

				// An externally visible variable so static libraries extract.
				__attribute__((visibility("protected"), used)) int x;

	// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o			// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
	// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc			// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc
	// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc			// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc

	// RUN: clang-offload-packager -o %t.out \			// RUN: clang-offload-packager -o %t.out \
	// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \			// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
	// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70			// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
	// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out			// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
	Show All 18 Lines
	// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \			// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
	// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908			// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
	// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out			// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
	// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \			// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
	// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 \| FileCheck %s --check-prefix=AMDGPU-LINK			// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 \| FileCheck %s --check-prefix=AMDGPU-LINK

	// AMDGPU-LINK: clang{{.}} -o {{.}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.}}.o {{.}}.o			// AMDGPU-LINK: clang{{.}} -o {{.}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.}}.o {{.}}.o

				// RUN: clang-offload-packager -o %t-lib.out \
				// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a
				// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
				// RUN: llvm-ar rcs %t.a %t.o
				// RUN: clang-offload-packager -o %t.out \
				// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack+
				// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
				// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
				// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \| FileCheck %s --check-prefix=AMDGPU-LINK-ID

				yaxunlUnsubmitted Not Done Reply Inline Actions can we put some variables in the input bitcode so that we can check the linked bitcode? I would expect there will be only one linked bitcode for gfx90a:xnack+ and it contains both variables. I don't think it is a good idea to let the final object embed bitcode for both gfx90a:xnack+ and gfx90a since that will result in an invalid container. Therefore I think we should only do linking with target ID's from the first container. yaxunl: can we put some variables in the input bitcode so that we can check the linked bitcode? I…
				jhuber6AuthorUnsubmitted Done Reply Inline Actions I can make a static library that's `gfx90a`, that covers the main case where we still link in the OpenMP runtime library that's compiled with `90a` if the user uses `90a:xnack+`. I'd need to place a random external variable to force it to extract however. jhuber6: I can make a static library that's `gfx90a`, that covers the main case where we still link in…
				// AMDGPU-LINK-ID: clang{{.}} -o {{.}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -Wl,--no-undefined {{.}}.o {{.}}.o

	// RUN: clang-offload-packager -o %t.out \			// RUN: clang-offload-packager -o %t.out \
	// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \			// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
	// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030			// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
	// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out			// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
	// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \			// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \
	// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 \| FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS			// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 \| FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS

	// AMDGPU-LTO-TEMPS: clang{{.}} -o {{.}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps			// AMDGPU-LTO-TEMPS: clang{{.}} -o {{.}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps
	▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Show First 20 Lines • Show All 963 Lines • ▼ Show 20 Lines

/// Returns a new ArgList containg arguments used for the device linking phase.		/// Returns a new ArgList containg arguments used for the device linking phase.
DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,		DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
const InputArgList &Args) {		const InputArgList &Args) {
DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));		DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));
for (Arg *A : Args)		for (Arg *A : Args)
DAL.append(A);		DAL.append(A);

// Set the subarchitecture and target triple for this compilation.		// Set the subarchitecture and target triple for this compilation. The input
		// may be an AMDGPU target-id so we split off anything before the colon.
const OptTable &Tbl = getOptTable();		const OptTable &Tbl = getOptTable();
DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),		DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
		Args.MakeArgString(
		Input.front().getBinary()->getArch().split(':').first));
		DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));		Args.MakeArgString(Input.front().getBinary()->getArch()));
DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),		DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));		Args.MakeArgString(Input.front().getBinary()->getTriple()));

// If every input file is bitcode we have whole program visibility as we do		// If every input file is bitcode we have whole program visibility as we do
// only support static linking with bitcode.		// only support static linking with bitcode.
auto ContainsBitcode = [](const OffloadFile &F) {		auto ContainsBitcode = [](const OffloadFile &F) {
return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode;		return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode;
Show All 13 Lines	DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
}		}

return DAL;		return DAL;
}		}

/// Transforms all the extracted offloading input files into an image that can		/// Transforms all the extracted offloading input files into an image that can
/// be registered by the runtime.		/// be registered by the runtime.
Expected<SmallVector<StringRef>>		Expected<SmallVector<StringRef>>
linkAndWrapDeviceFiles(SmallVectorImpl<OffloadFile> &LinkerInputFiles,		linkAndWrapDeviceFiles(SmallVector<SmallVector<OffloadFile>> &LinkerInputFiles,
const InputArgList &Args, char **Argv, int Argc) {		const InputArgList &Args, char **Argv, int Argc) {
llvm::TimeTraceScope TimeScope("Handle all device input");		llvm::TimeTraceScope TimeScope("Handle all device input");

DenseMap<OffloadFile::TargetID, SmallVector<OffloadFile>> InputMap;
for (auto &File : LinkerInputFiles)
InputMap[File].emplace_back(std::move(File));
LinkerInputFiles.clear();

SmallVector<SmallVector<OffloadFile>> InputsForTarget;
for (auto &[ID, Input] : InputMap)
InputsForTarget.emplace_back(std::move(Input));
InputMap.clear();

std::mutex ImageMtx;		std::mutex ImageMtx;
DenseMap<OffloadKind, SmallVector<OffloadingImage>> Images;		DenseMap<OffloadKind, SmallVector<OffloadingImage>> Images;
auto Err = parallelForEachError(InputsForTarget, [&](auto &Input) -> Error {		auto Err = parallelForEachError(LinkerInputFiles, [&](auto &Input) -> Error {
llvm::TimeTraceScope TimeScope("Link device input");		llvm::TimeTraceScope TimeScope("Link device input");

// Each thread needs its own copy of the base arguments to maintain		// Each thread needs its own copy of the base arguments to maintain
// per-device argument storage of synthetic strings.		// per-device argument storage of synthetic strings.
const OptTable &Tbl = getOptTable();		const OptTable &Tbl = getOptTable();
BumpPtrAllocator Alloc;		BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);		StringSaver Saver(Alloc);
auto BaseArgs =		auto BaseArgs =
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	for (OffloadKind Kind : ActiveOffloadKinds) {
std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);		std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
OffloadingImage TheImage{};		OffloadingImage TheImage{};
TheImage.TheImageKind =		TheImage.TheImageKind =
Args.hasArg(OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;		Args.hasArg(OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
TheImage.TheOffloadKind = Kind;		TheImage.TheOffloadKind = Kind;
TheImage.StringData["triple"] =		TheImage.StringData["triple"] =
Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_triple_EQ));		Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_triple_EQ));
TheImage.StringData["arch"] =		TheImage.StringData["arch"] =
Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_arch_EQ));		Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_full_arch_EQ));
TheImage.Image = std::move(*FileOrErr);		TheImage.Image = std::move(*FileOrErr);

Images[Kind].emplace_back(std::move(TheImage));		Images[Kind].emplace_back(std::move(TheImage));
}		}
return Error::success();		return Error::success();
});		});
if (Err)		if (Err)
return std::move(Err);		return std::move(Err);
▲ Show 20 Lines • Show All 202 Lines • ▼ Show 20 Lines	default:
return false;		return false;
}		}
}		}

/// Search the input files and libraries for embedded device offloading code		/// Search the input files and libraries for embedded device offloading code
/// and add it to the list of files to be linked. Files coming from static		/// and add it to the list of files to be linked. Files coming from static
/// libraries are only added to the input if they are used by an existing		/// libraries are only added to the input if they are used by an existing
/// input file.		/// input file.
Expected<SmallVector<OffloadFile>> getDeviceInput(const ArgList &Args) {		Expected<SmallVector<SmallVector<OffloadFile>>>
		getDeviceInput(const ArgList &Args) {
llvm::TimeTraceScope TimeScope("ExtractDeviceCode");		llvm::TimeTraceScope TimeScope("ExtractDeviceCode");

StringRef Root = Args.getLastArgValue(OPT_sysroot_EQ);		StringRef Root = Args.getLastArgValue(OPT_sysroot_EQ);
SmallVector<StringRef> LibraryPaths;		SmallVector<StringRef> LibraryPaths;
for (const opt::Arg *Arg : Args.filtered(OPT_library_path))		for (const opt::Arg *Arg : Args.filtered(OPT_library_path))
LibraryPaths.push_back(Arg->getValue());		LibraryPaths.push_back(Arg->getValue());

BumpPtrAllocator Alloc;		BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);		StringSaver Saver(Alloc);

// Try to extract device code from the linker input files.		// Try to extract device code from the linker input files.
SmallVector<OffloadFile> InputFiles;		DenseMap<OffloadFile::TargetID, SmallVector<OffloadFile>> InputMap;
DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms;		DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms;
bool WholeArchive = false;		bool WholeArchive = false;
for (const opt::Arg *Arg : Args.filtered(		for (const opt::Arg *Arg : Args.filtered(
OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {		OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {
if (Arg->getOption().matches(OPT_whole_archive) \|\|		if (Arg->getOption().matches(OPT_whole_archive) \|\|
Arg->getOption().matches(OPT_no_whole_archive)) {		Arg->getOption().matches(OPT_no_whole_archive)) {
WholeArchive = Arg->getOption().matches(OPT_whole_archive);		WholeArchive = Arg->getOption().matches(OPT_whole_archive);
continue;		continue;
Show All 30 Lines	for (const opt::Arg *Arg : Args.filtered(
bool IsArchive = identify_magic(Buffer.getBuffer()) == file_magic::archive;		bool IsArchive = identify_magic(Buffer.getBuffer()) == file_magic::archive;
bool Extracted = true;		bool Extracted = true;
while (Extracted) {		while (Extracted) {
Extracted = false;		Extracted = false;
for (OffloadFile &Binary : Binaries) {		for (OffloadFile &Binary : Binaries) {
if (!Binary.getBinary())		if (!Binary.getBinary())
continue;		continue;

		// Initialize the map with an empty set of inputs.
		OffloadFile::TargetID BinaryID =
		OffloadFile::TargetID(Saver.save(Binary.getBinary()->getTriple()),
		Saver.save(Binary.getBinary()->getArch()));
		if (!InputMap.count(BinaryID))
		InputMap[BinaryID] = SmallVector<OffloadFile>();

		// We need to compare this binary input with every input architecture
		// and copy it in if it's compatible. This allows a single binary to
		// participate in multiple link jobs.
		DenseMap<OffloadFile::TargetID, SmallVector<OffloadFile>> NewInputMap;
		for (const auto &[ID, Input] : InputMap) {
// If we don't have an object file for this architecture do not		// If we don't have an object file for this architecture do not
// extract.		// extract.
if (IsArchive && !WholeArchive && !Syms.count(Binary))		if (IsArchive && !WholeArchive && Input.empty())
		continue;

		// We only add the input if the binary is compatible with the slot.
		if (!areTargetsCompatible(Binary, ID))
continue;		continue;

Expected<bool> ExtractOrErr =		Expected<bool> ExtractOrErr = getSymbols(
getSymbols(Binary.getBinary()->getImage(),		Binary.getBinary()->getImage(),
Binary.getBinary()->getOffloadKind(), IsArchive, Saver,		Binary.getBinary()->getOffloadKind(), IsArchive, Saver, Syms[ID]);
Syms[Binary]);
if (!ExtractOrErr)		if (!ExtractOrErr)
return ExtractOrErr.takeError();		return ExtractOrErr.takeError();

Extracted = !WholeArchive && *ExtractOrErr;		Extracted = !WholeArchive && *ExtractOrErr;

if (!IsArchive \|\| WholeArchive \|\| Extracted)		if (!IsArchive \|\| WholeArchive \|\| Extracted) {
InputFiles.emplace_back(std::move(Binary));		auto NewBinaryOrErr = Binary.copy();
		if (!NewBinaryOrErr)
		return NewBinaryOrErr.takeError();
		NewInputMap[ID].emplace_back(std::move(*NewBinaryOrErr));
		}
		}

		for (auto &[NewID, NewInput] : NewInputMap)
		InputMap[NewID].append(std::make_move_iterator(NewInput.begin()),
		std::make_move_iterator(NewInput.end()));

		Binary.takeBinary();
// If we extracted any files we need to check all the symbols again.		// If we extracted any files we need to check all the symbols again.
if (Extracted)		if (Extracted)
break;		break;
}		}
}		}
}		}

for (StringRef Library : Args.getAllArgValues(OPT_bitcode_library_EQ)) {		SmallVector<SmallVector<OffloadFile>> InputFiles;
auto FileOrErr = getInputBitcodeLibrary(Library);		for (auto &[ID, Input] : InputMap)
if (!FileOrErr)		if (!Input.empty())
return FileOrErr.takeError();		InputFiles.emplace_back(std::move(Input));
InputFiles.push_back(std::move(*FileOrErr));		InputMap.clear();
}

return std::move(InputFiles);		return std::move(InputFiles);
}		}

} // namespace		} // namespace

int main(int Argc, char **Argv) {		int main(int Argc, char **Argv) {
InitLLVM X(Argc, Argv);		InitLLVM X(Argc, Argv);
▲ Show 20 Lines • Show All 96 Lines • Show Last 20 Lines

clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td

	Show First 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
	def wrapper_jobs : Joined<["--"], "wrapper-jobs=">,			def wrapper_jobs : Joined<["--"], "wrapper-jobs=">,
	Flags<[WrapperOnlyOption]>, MetaVarName<"<number>">,			Flags<[WrapperOnlyOption]>, MetaVarName<"<number>">,
	HelpText<"Sets the number of parallel jobs to use for device linking">;			HelpText<"Sets the number of parallel jobs to use for device linking">;

	// Flags passed to the device linker.			// Flags passed to the device linker.
	def arch_EQ : Joined<["--"], "arch=">,			def arch_EQ : Joined<["--"], "arch=">,
	Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<arch>">,			Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<arch>">,
	HelpText<"The device subarchitecture">;			HelpText<"The device subarchitecture">;
				def full_arch_EQ : Joined<["--"], "full-arch=">,
				Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<arch>">,
				HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
	def triple_EQ : Joined<["--"], "triple=">,			def triple_EQ : Joined<["--"], "triple=">,
	Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<triple>">,			Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<triple>">,
	HelpText<"The device target triple">;			HelpText<"The device target triple">;
	def whole_program : Flag<["--"], "whole-program">,			def whole_program : Flag<["--"], "whole-program">,
	Flags<[DeviceOnlyOption, HelpHidden]>,			Flags<[DeviceOnlyOption, HelpHidden]>,
	HelpText<"LTO has visibility of all input files">;			HelpText<"LTO has visibility of all input files">;
	def linker_arg_EQ : Joined<["--"], "linker-arg=">,			def linker_arg_EQ : Joined<["--"], "linker-arg=">,
	Flags<[DeviceOnlyOption, HelpHidden]>,			Flags<[DeviceOnlyOption, HelpHidden]>,
	▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/include/llvm/Object/OffloadBinary.h

Show All 11 Lines
// thin wrapper around the image itself. If this format becomes sufficiently		// thin wrapper around the image itself. If this format becomes sufficiently
// complex it should be moved to a standard binary format like msgpack or ELF.		// complex it should be moved to a standard binary format like msgpack or ELF.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef LLVM_OBJECT_OFFLOADBINARY_H		#ifndef LLVM_OBJECT_OFFLOADBINARY_H
#define LLVM_OBJECT_OFFLOADBINARY_H		#define LLVM_OBJECT_OFFLOADBINARY_H

		#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"		#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/Object/Binary.h"		#include "llvm/Object/Binary.h"
#include "llvm/Support/Error.h"		#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"		#include "llvm/Support/MemoryBuffer.h"
#include <memory>		#include <memory>

namespace llvm {		namespace llvm {
▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines	private:
/// Location of the metadata entries within the binary.		/// Location of the metadata entries within the binary.
const Entry *TheEntry;		const Entry *TheEntry;
};		};

/// A class to contain the binary information for a single OffloadBinary that		/// A class to contain the binary information for a single OffloadBinary that
/// owns its memory.		/// owns its memory.
class OffloadFile : public OwningBinary<OffloadBinary> {		class OffloadFile : public OwningBinary<OffloadBinary> {
public:		public:
		/// An ordered pair of the target triple and the architecture.
using TargetID = std::pair<StringRef, StringRef>;		using TargetID = std::pair<StringRef, StringRef>;

OffloadFile(std::unique_ptr<OffloadBinary> Binary,		OffloadFile(std::unique_ptr<OffloadBinary> Binary,
std::unique_ptr<MemoryBuffer> Buffer)		std::unique_ptr<MemoryBuffer> Buffer)
: OwningBinary<OffloadBinary>(std::move(Binary), std::move(Buffer)) {}		: OwningBinary<OffloadBinary>(std::move(Binary), std::move(Buffer)) {}

		Expected<OffloadFile> copy() const {
		std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBufferCopy(
		getBinary()->getMemoryBufferRef().getBuffer());
		auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
		if (!NewBinaryOrErr)
		return NewBinaryOrErr.takeError();
		return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
		}

/// We use the Triple and Architecture pair to group linker inputs together.		/// We use the Triple and Architecture pair to group linker inputs together.
/// This conversion function lets us use these inputs in a hash-map.		/// This conversion function lets us use these inputs in a hash-map.
operator TargetID() const {		operator TargetID() const {
return std::make_pair(getBinary()->getTriple(), getBinary()->getArch());		return std::make_pair(getBinary()->getTriple(), getBinary()->getArch());
}		}
};		};

		/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
		inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
		const OffloadFile::TargetID RHS) {
		if (LHS == RHS)
		return true;

		// If the target is AMD we check the target IDs for compatibility. A target id
		// is a string conforming to the folowing BNF syntax:
		//
		// target-id ::= '<arch> ( : <feature> ( '+' \| '-' ) )*'
		//
		// This is used to link mutually compatible architectures together.
		llvm::Triple T(LHS.first);
		if (!T.isAMDGPU())
		return false;

		// The targets are compatible if the architecture is a subset of the other.
		if (RHS.second.contains(LHS.second))
		return true;
		return false;
		}

/// Extracts embedded device offloading code from a memory \p Buffer to a list		/// Extracts embedded device offloading code from a memory \p Buffer to a list
/// of \p Binaries.		/// of \p Binaries.
Error extractOffloadBinaries(MemoryBufferRef Buffer,		Error extractOffloadBinaries(MemoryBufferRef Buffer,
SmallVectorImpl<OffloadFile> &Binaries);		SmallVectorImpl<OffloadFile> &Binaries);

/// Convert a string \p Name to an image kind.		/// Convert a string \p Name to an image kind.
ImageKind getImageKind(StringRef Name);		ImageKind getImageKind(StringRef Name);

Show All 13 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[LinkerWrapper] Support device binaries in multiple link jobs
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 532748

clang/lib/Driver/ToolChains/Clang.cpp

clang/test/Driver/amdgpu-openmp-toolchain.c

clang/test/Driver/linker-wrapper.c

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td

llvm/include/llvm/Object/OffloadBinary.h

This is an archive of the discontinued LLVM Phabricator instance.

[LinkerWrapper] Support device binaries in multiple link jobsNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 532748

clang/lib/Driver/ToolChains/Clang.cpp

clang/test/Driver/amdgpu-openmp-toolchain.c

clang/test/Driver/linker-wrapper.c

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td

llvm/include/llvm/Object/OffloadBinary.h

[LinkerWrapper] Support device binaries in multiple link jobs
Needs ReviewPublic