Diff 428621

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Show First 20 Lines • Show All 160 Lines • ▼ Show 20 Lines
#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading"		#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading"

/// The magic offset for the first object inside CUDA's fatbinary. This can be		/// The magic offset for the first object inside CUDA's fatbinary. This can be
/// different but it should work for what is passed here.		/// different but it should work for what is passed here.
static constexpr unsigned FatbinaryOffset = 0x50;		static constexpr unsigned FatbinaryOffset = 0x50;

/// Information for a device offloading file extracted from the host.		/// Information for a device offloading file extracted from the host.
struct DeviceFile {		struct DeviceFile {
DeviceFile(StringRef Kind, StringRef TheTriple, StringRef Arch,		DeviceFile(OffloadKind Kind, StringRef TheTriple, StringRef Arch,
StringRef Filename, bool IsLibrary = false)		StringRef Filename, bool IsLibrary = false)
: Kind(Kind), TheTriple(TheTriple), Arch(Arch), Filename(Filename),		: Kind(Kind), TheTriple(TheTriple), Arch(Arch), Filename(Filename),
IsLibrary(IsLibrary) {}		IsLibrary(IsLibrary) {}

std::string Kind;		OffloadKind Kind;
std::string TheTriple;		std::string TheTriple;
std::string Arch;		std::string Arch;
std::string Filename;		std::string Filename;
bool IsLibrary;		bool IsLibrary;
};		};

namespace llvm {		namespace llvm {
/// Helper that allows DeviceFile to be used as a key in a DenseMap. For now we		/// Helper that allows DeviceFile to be used as a key in a DenseMap. For now we
/// assume device files with matching architectures and triples but different		/// assume device files with matching architectures and triples but different
/// offloading kinds should be handlded together, this may not be true in the		/// offloading kinds should be handlded together, this may not be true in the
/// future.		/// future.

		// Provide DenseMapInfo for OffloadKind.
		template <> struct DenseMapInfo<OffloadKind> {
		static inline OffloadKind getEmptyKey() { return OFK_LAST; }
		static inline OffloadKind getTombstoneKey() {
		traUnsubmitted Not Done Reply Inline Actions Extend `enum OffloadKind` to include these special kinds. tra: Extend `enum OffloadKind` to include these special kinds.
		return static_cast<OffloadKind>(OFK_LAST + 1);
		}
		static unsigned getHashValue(const OffloadKind &Val) { return Val * 37U; }
		traUnsubmitted Not Done Reply Inline Actions Is there a particular reason for multiplying by 37? Enum values by themselves should do the job just fine. tra: Is there a particular reason for multiplying by 37? Enum values by themselves should do the job…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions That's what LLVM does for the regular `DenseMapInfo<uint16_t>::getHashValue()` so I just copied it here. jhuber6: That's what LLVM does for the regular `DenseMapInfo<uint16_t>::getHashValue()` so I just copied…
		traUnsubmitted Not Done Reply Inline Actions It would make sense for mapping the full range of uint16_t into a much smaller set of entries. In this case we're already dealing with a very small densely packed set of values. For all practical purposes is a convenient overkill We could get by with just using a vector+direct indexing. We also don't care about hash collisions even if they happen. Removing multiplication would not make much of a difference, but it would be one less question for the reader to ask, when they look at this code. tra: It would make sense for mapping the full range of uint16_t into a much smaller set of entries.

		static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
		return LHS == RHS;
		}
		};
template <> struct DenseMapInfo<DeviceFile> {		template <> struct DenseMapInfo<DeviceFile> {
static DeviceFile getEmptyKey() {		static DeviceFile getEmptyKey() {
return {DenseMapInfo<StringRef>::getEmptyKey(),		return {DenseMapInfo<OffloadKind>::getEmptyKey(),
		traUnsubmitted Not Done Reply Inline Actions Why do we limit ourselves to uint16_t here? Can't we just use `OffloadKind` itself and get rid of these casts? tra: Why do we limit ourselves to uint16_t here? Can't we just use `OffloadKind` itself and get rid…
DenseMapInfo<StringRef>::getEmptyKey(),		DenseMapInfo<StringRef>::getEmptyKey(),
DenseMapInfo<StringRef>::getEmptyKey(),		DenseMapInfo<StringRef>::getEmptyKey(),
DenseMapInfo<StringRef>::getEmptyKey()};		DenseMapInfo<StringRef>::getEmptyKey()};
}		}
static DeviceFile getTombstoneKey() {		static DeviceFile getTombstoneKey() {
return {DenseMapInfo<StringRef>::getTombstoneKey(),		return {DenseMapInfo<OffloadKind>::getTombstoneKey(),
DenseMapInfo<StringRef>::getTombstoneKey(),		DenseMapInfo<StringRef>::getTombstoneKey(),
DenseMapInfo<StringRef>::getTombstoneKey(),		DenseMapInfo<StringRef>::getTombstoneKey(),
DenseMapInfo<StringRef>::getTombstoneKey()};		DenseMapInfo<StringRef>::getTombstoneKey()};
}		}
static unsigned getHashValue(const DeviceFile &I) {		static unsigned getHashValue(const DeviceFile &I) {
return DenseMapInfo<StringRef>::getHashValue(I.TheTriple) ^		return DenseMapInfo<StringRef>::getHashValue(I.TheTriple) ^
DenseMapInfo<StringRef>::getHashValue(I.Arch);		DenseMapInfo<StringRef>::getHashValue(I.Arch);
}		}
Show All 25 Lines	std::string getMainExecutable(const char *Name) {
return sys::path::parent_path(COWPath).str();		return sys::path::parent_path(COWPath).str();
}		}

/// Extract the device file from the string '<kind>-<triple>-<arch>=<library>'.		/// Extract the device file from the string '<kind>-<triple>-<arch>=<library>'.
DeviceFile getBitcodeLibrary(StringRef LibraryStr) {		DeviceFile getBitcodeLibrary(StringRef LibraryStr) {
auto DeviceAndPath = StringRef(LibraryStr).split('=');		auto DeviceAndPath = StringRef(LibraryStr).split('=');
auto StringAndArch = DeviceAndPath.first.rsplit('-');		auto StringAndArch = DeviceAndPath.first.rsplit('-');
auto KindAndTriple = StringAndArch.first.split('-');		auto KindAndTriple = StringAndArch.first.split('-');
return DeviceFile(KindAndTriple.first, KindAndTriple.second,		return DeviceFile(getOffloadKind(KindAndTriple.first), KindAndTriple.second,
StringAndArch.second, DeviceAndPath.second);		StringAndArch.second, DeviceAndPath.second);
}		}

/// Get a temporary filename suitable for output.		/// Get a temporary filename suitable for output.
Error createOutputFile(const Twine &Prefix, StringRef Extension,		Error createOutputFile(const Twine &Prefix, StringRef Extension,
SmallString<128> &NewFilename) {		SmallString<128> &NewFilename) {
if (!SaveTemps) {		if (!SaveTemps) {
if (std::error_code EC =		if (std::error_code EC =
▲ Show 20 Lines • Show All 114 Lines • ▼ Show 20 Lines	while (Offset < Contents.size()) {
if (!OutputOrErr)		if (!OutputOrErr)
return OutputOrErr.takeError();		return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);		std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
std::copy(Binary.getImage().bytes_begin(), Binary.getImage().bytes_end(),		std::copy(Binary.getImage().bytes_begin(), Binary.getImage().bytes_end(),
Output->getBufferStart());		Output->getBufferStart());
if (Error E = Output->commit())		if (Error E = Output->commit())
return E;		return E;

DeviceFiles.emplace_back(Kind, Binary.getTriple(), Binary.getArch(),		DeviceFiles.emplace_back(Binary.getOffloadKind(), Binary.getTriple(),
TempFile, IsLibrary);		Binary.getArch(), TempFile, IsLibrary);

Offset += Binary.getSize();		Offset += Binary.getSize();
}		}

return Error::success();		return Error::success();
}		}

Expected<Optional<std::string>>		Expected<Optional<std::string>>
▲ Show 20 Lines • Show All 307 Lines • ▼ Show 20 Lines	Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
for (StringRef Input : InputFiles)		for (StringRef Input : InputFiles)
CmdArgs.push_back(Input);		CmdArgs.push_back(Input);

if (Error Err = executeCommands(*NvlinkPath, CmdArgs))		if (Error Err = executeCommands(*NvlinkPath, CmdArgs))
return std::move(Err);		return std::move(Err);

return static_cast<std::string>(TempFile);		return static_cast<std::string>(TempFile);
}		}

		Expected<std::string> fatbinary(ArrayRef<StringRef> InputFiles,
		Triple TheTriple, ArrayRef<StringRef> Archs) {
		// NVPTX uses the fatbinary program to bundle the linked images.
		Expected<std::string> FatBinaryPath =
		findProgram("fatbinary", {CudaBinaryPath});
		if (!FatBinaryPath)
		return FatBinaryPath.takeError();

		// Create a new file to write the linked device image to.
		SmallString<128> TempFile;
		if (Error Err = createOutputFile(sys::path::filename(ExecutableName) +
		"-device-" + TheTriple.getArchName(),
		"fatbin", TempFile))
		return std::move(Err);

		BumpPtrAllocator Alloc;
		StringSaver Saver(Alloc);

		SmallVector<StringRef, 16> CmdArgs;
		CmdArgs.push_back(*FatBinaryPath);
		CmdArgs.push_back(TheTriple.isArch64Bit() ? "-64" : "-32");
		CmdArgs.push_back("--create");
		CmdArgs.push_back(TempFile);
		for (const auto &FileAndArch : llvm::zip(InputFiles, Archs))
		CmdArgs.push_back(Saver.save("--image=profile=" + std::get<1>(FileAndArch) +
		",file=" + std::get<0>(FileAndArch)));

		if (Error Err = executeCommands(*FatBinaryPath, CmdArgs))
		traUnsubmitted Not Done Reply Inline Actions We should have a way to pass extra options to fatbinary, too. E.g. we may want to use `--compress-all`. Also, we may need to pass through `-g` for debug builds. Oh. Debug builds. Makes me wonder if cuda-gdb will be able to find GPU binaries packaged by the new driver. If it does not, it will be a rather serious problem. It would likely affect various profiling tools the same way, too. Can you give it a try? tra: We should have a way to pass extra options to fatbinary, too. E.g. we may want to use `…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions I was planning on implementing this stuff more generally when we get the new binary tool in D125165 landed. That will allow me to more generally put any number of command line arguments into the binary itself and fish it out here. We already support a janky version for the -Xcuda-ptxas option here, but it's a mess and I'm planning on getting rid of it. Is it okay to punt that into the future? Debug builds are another sore point I don't handle super well right now but will be addressed better with D125165. I haven't tested cuda-gdb, but I embed the fatbinary the same way that we do in non-rdc mode. I can read them with cuobjdump in the final executable so I'm assuming it's compatible. jhuber6: I was planning on implementing this stuff more generally when we get the new binary tool in…
		traUnsubmitted Done Reply Inline Actions I can read them with cuobjdump in the final executable so I'm assuming it's compatible. We should be OK then. tra: > I can read them with cuobjdump in the final executable so I'm assuming it's compatible. We…
		traUnsubmitted Done Reply Inline Actions Debug builds are another sore point I don't handle super well right now but will be addressed better with D125165 OK. tra: > Debug builds are another sore point I don't handle super well right now but will be addressed…
		return std::move(Err);

		return static_cast<std::string>(TempFile);
		}
} // namespace nvptx		} // namespace nvptx
namespace amdgcn {		namespace amdgcn {
Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,		Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
StringRef Arch) {		StringRef Arch) {
// AMDGPU uses lld to link device object files.		// AMDGPU uses lld to link device object files.
Expected<std::string> LLDPath =		Expected<std::string> LLDPath =
findProgram("lld", {getMainExecutable("lld")});		findProgram("lld", {getMainExecutable("lld")});
if (!LLDPath)		if (!LLDPath)
▲ Show 20 Lines • Show All 428 Lines • ▼ Show 20 Lines	Error linkBitcodeFiles(SmallVectorImpl<std::string> &InputFiles,
InputFiles = NewInputFiles;		InputFiles = NewInputFiles;

return Error::success();		return Error::success();
}		}

/// Runs the appropriate linking action on all the device files specified in \p		/// Runs the appropriate linking action on all the device files specified in \p
/// DeviceFiles. The linked device images are returned in \p LinkedImages.		/// DeviceFiles. The linked device images are returned in \p LinkedImages.
Error linkDeviceFiles(ArrayRef<DeviceFile> DeviceFiles,		Error linkDeviceFiles(ArrayRef<DeviceFile> DeviceFiles,
SmallVectorImpl<std::string> &LinkedImages) {		SmallVectorImpl<DeviceFile> &LinkedImages) {
// Get the list of inputs for a specific device.		// Get the list of inputs and active offload kinds for a specific device.
DenseMap<DeviceFile, SmallVector<std::string, 4>> LinkerInputMap;		DenseMap<DeviceFile, SmallVector<std::string, 4>> LinkerInputMap;
		DenseMap<DeviceFile, DenseSet<OffloadKind>> ActiveOffloadKinds;
SmallVector<DeviceFile, 4> LibraryFiles;		SmallVector<DeviceFile, 4> LibraryFiles;
for (auto &File : DeviceFiles) {		for (auto &File : DeviceFiles) {
if (File.IsLibrary)		if (File.IsLibrary) {
LibraryFiles.push_back(File);		LibraryFiles.push_back(File);
else		} else {
LinkerInputMap[File].push_back(File.Filename);		LinkerInputMap[File].push_back(File.Filename);
		ActiveOffloadKinds[File].insert(File.Kind);
		}
}		}

// Static libraries are loaded lazily as-needed, only add them if other files		// Static libraries are loaded lazily as-needed, only add them if other files
// are present.		// are present.
// TODO: We need to check the symbols as well, static libraries are only		// TODO: We need to check the symbols as well, static libraries are only
// loaded if they contain symbols that are currently undefined or common		// loaded if they contain symbols that are currently undefined or common
// in the symbol table.		// in the symbol table.
for (auto &File : LibraryFiles)		for (auto &File : LibraryFiles)
if (LinkerInputMap.count(File))		if (LinkerInputMap.count(File))
LinkerInputMap[File].push_back(File.Filename);		LinkerInputMap[File].push_back(File.Filename);

// Try to link each device toolchain.		// Try to link each device toolchain.
for (auto &LinkerInput : LinkerInputMap) {		for (auto &LinkerInput : LinkerInputMap) {
DeviceFile &File = LinkerInput.getFirst();		DeviceFile &File = LinkerInput.getFirst();
Triple TheTriple = Triple(File.TheTriple);		Triple TheTriple = Triple(File.TheTriple);
		auto &LinkerInputFiles = LinkerInput.getSecond();
bool WholeProgram = false;		bool WholeProgram = false;

// Run LTO on any bitcode files and replace the input with the result.		// Run LTO on any bitcode files and replace the input with the result.
if (Error Err = linkBitcodeFiles(LinkerInput.getSecond(), TheTriple,		if (Error Err = linkBitcodeFiles(LinkerInputFiles, TheTriple, File.Arch,
File.Arch, WholeProgram))		WholeProgram))
		traUnsubmitted Not Done Reply Inline Actions Nit. We should think of changing `linkBitcodeFiles` to return `llvm::ErrorOr<bool>` so we can return `WholeArchive` value, instead of modifying it as an argument. tra: Nit. We should think of changing `linkBitcodeFiles` to return `llvm::ErrorOr<bool>` so we can…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions That's a good idea, I'm planning on cleaning a lot of this stuff up later. jhuber6: That's a good idea, I'm planning on cleaning a lot of this stuff up later.
return Err;		return Err;

// If we are embedding bitcode for JIT, skip the final device linking.
if (EmbedBitcode) {		if (EmbedBitcode) {
assert(!LinkerInput.getSecond().empty() && "No bitcode image to embed");		// If we are embedding bitcode for JIT, skip the final device linking.
LinkedImages.push_back(LinkerInput.getSecond().front());		if (LinkerInputFiles.size() != 1 \|\| !WholeProgram)
		return createStringError(inconvertibleErrorCode(),
		traUnsubmitted Not Done Reply Inline Actions What's expected to happen if we have more than one input? If only one is ever expected, I'd add an assert. tra: What's expected to happen if we have more than one input? If only one is ever expected, I'd add…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions This isn't used right now since JIT hasn't made it in. It should probably be a proper error honestly. jhuber6: This isn't used right now since JIT hasn't made it in. It should probably be a proper error…
		"Unable to embed bitcode image for JIT");
		LinkedImages.emplace_back(OFK_OpenMP, TheTriple.getTriple(), File.Arch,
		LinkerInputFiles.front());
continue;		continue;
}		} else if (WholeProgram && TheTriple.isNVPTX()) {
		traUnsubmitted Not Done Reply Inline Actions Should `EmbedBitcode` be mutually exclusive vs `WholeArchive`? Can we ever end up with both unset? tra: Should `EmbedBitcode` be mutually exclusive vs `WholeArchive`? Can we ever end up with both…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions `EmbedBitcode` might need to require `WholeArchive` considering that it's supposed to be a completely linked image that can be sent to a JIT engine. jhuber6: `EmbedBitcode` might need to require `WholeArchive` considering that it's supposed to be a…
		// If we performed LTO on NVPTX and had whole program visibility, we can
// If we performed LTO on NVPTX and had whole program visibility, we can use		// use CUDA in non-RDC mode.
// CUDA in non-RDC mode.		if (LinkerInputFiles.size() != 1)
		traUnsubmitted Not Done Reply Inline Actions ditto about the assert on the number of inputs. tra: ditto about the assert on the number of inputs.
if (WholeProgram && TheTriple.isNVPTX()) {		return createStringError(inconvertibleErrorCode(),
assert(!LinkerInput.getSecond().empty() && "No non-RDC image to embed");		"Invalid number of inputs for non-RDC mode");
LinkedImages.push_back(LinkerInput.getSecond().front());		for (OffloadKind Kind : ActiveOffloadKinds[LinkerInput.getFirst()])
		LinkedImages.emplace_back(Kind, TheTriple.getTriple(), File.Arch,
		LinkerInputFiles.front());
continue;		continue;
}		}

auto ImageOrErr = linkDevice(LinkerInput.getSecond(), TheTriple, File.Arch);		auto ImageOrErr = linkDevice(LinkerInputFiles, TheTriple, File.Arch);
if (!ImageOrErr)		if (!ImageOrErr)
return ImageOrErr.takeError();		return ImageOrErr.takeError();

LinkedImages.push_back(*ImageOrErr);		// Create separate images for all the active offload kinds.
		for (OffloadKind Kind : ActiveOffloadKinds[LinkerInput.getFirst()])
		LinkedImages.emplace_back(Kind, TheTriple.getTriple(), File.Arch,
		*ImageOrErr);
}		}
return Error::success();		return Error::success();
}		}

// Compile the module to an object file using the appropriate target machine for		// Compile the module to an object file using the appropriate target machine for
// the host triple.		// the host triple.
Expected<std::string> compileModule(Module &M) {		Expected<std::string> compileModule(Module &M) {
std::string Msg;		std::string Msg;
Show All 27 Lines	Expected<std::string> compileModule(Module &M) {
if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr, CGFT_ObjectFile))		if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr, CGFT_ObjectFile))
return createStringError(inconvertibleErrorCode(),		return createStringError(inconvertibleErrorCode(),
"Failed to execute host backend");		"Failed to execute host backend");
CodeGenPasses.run(M);		CodeGenPasses.run(M);

return static_cast<std::string>(ObjectFile);		return static_cast<std::string>(ObjectFile);
}		}

/// Creates the object file containing the device image and runtime registration		/// Load all of the OpenMP images into a buffer and pass it to the binary
/// code from the device images stored in \p Images.		/// wrapping function to create the registration code in the module \p M.
Expected<std::string> wrapDeviceImages(ArrayRef<std::string> Images) {		Error wrapOpenMPImages(Module &M, ArrayRef<DeviceFile> Images) {
SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers;		SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers;
SmallVector<ArrayRef<char>, 4> ImagesToWrap;		SmallVector<ArrayRef<char>, 4> ImagesToWrap;
		for (const DeviceFile &File : Images) {
for (StringRef ImageFilename : Images) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =		llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
llvm::MemoryBuffer::getFileOrSTDIN(ImageFilename);		llvm::MemoryBuffer::getFileOrSTDIN(File.Filename);
if (std::error_code EC = ImageOrError.getError())		if (std::error_code EC = ImageOrError.getError())
return createFileError(ImageFilename, EC);		return createFileError(File.Filename, EC);
ImagesToWrap.emplace_back((*ImageOrError)->getBufferStart(),		ImagesToWrap.emplace_back((*ImageOrError)->getBufferStart(),
(*ImageOrError)->getBufferSize());		(*ImageOrError)->getBufferSize());
SavedBuffers.emplace_back(std::move(*ImageOrError));		SavedBuffers.emplace_back(std::move(*ImageOrError));
}		}

		if (Error Err = wrapOpenMPBinaries(M, ImagesToWrap))
		return Err;
		return Error::success();
		}

		/// Combine all of the CUDA images into a single fatbinary and pass it to the
		/// binary wrapping function to create the registration code in the module \p M.
		Error wrapCudaImages(Module &M, ArrayRef<DeviceFile> Images) {
		SmallVector<StringRef, 4> InputFiles;
		SmallVector<StringRef, 4> Architectures;
		for (const DeviceFile &File : Images) {
		InputFiles.push_back(File.Filename);
		Architectures.push_back(File.Arch);
		}

		// CUDA expects its embedded device images to be a fatbinary.
		Triple TheTriple = Triple(Images.front().TheTriple);
		auto FileOrErr = nvptx::fatbinary(InputFiles, TheTriple, Architectures);
		if (!FileOrErr)
		return FileOrErr.takeError();

		llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
		llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
		if (std::error_code EC = ImageOrError.getError())
		return createFileError(*FileOrErr, EC);

		auto ImageToWrap = ArrayRef<char>((*ImageOrError)->getBufferStart(),
		(*ImageOrError)->getBufferSize());

		if (Error Err = wrapCudaBinary(M, ImageToWrap))
		return Err;
		return Error::success();
		}

		/// Creates the object file containing the device image and runtime
		/// registration code from the device images stored in \p Images.
		Expected<SmallVector<std::string, 2>>
		wrapDeviceImages(ArrayRef<DeviceFile> Images) {
		DenseMap<OffloadKind, SmallVector<DeviceFile, 2>> ImagesForKind;
		for (const DeviceFile &Image : Images)
		ImagesForKind[Image.Kind].push_back(Image);

		SmallVector<std::string, 2> WrappedImages;
		for (const auto &KindAndImages : ImagesForKind) {
LLVMContext Context;		LLVMContext Context;
Module M("offload.wrapper.module", Context);		Module M("offload.wrapper.module", Context);
M.setTargetTriple(HostTriple);		M.setTargetTriple(HostTriple);
if (Error Err = wrapBinaries(M, ImagesToWrap))
		// Create registration code for the given offload kinds in the Module.
		switch (KindAndImages.getFirst()) {
		case OFK_OpenMP:
		if (Error Err = wrapOpenMPImages(M, KindAndImages.getSecond()))
		return std::move(Err);
		break;
		case OFK_Cuda:
		if (Error Err = wrapCudaImages(M, KindAndImages.getSecond()))
return std::move(Err);		return std::move(Err);
		break;
		default:
		return createStringError(inconvertibleErrorCode(),
		getOffloadKindName(KindAndImages.getFirst()) +
		" wrapping is not supported");
		}

if (PrintWrappedModule)		if (PrintWrappedModule)
llvm::errs() << M;		llvm::errs() << M;

		traUnsubmitted Not Done Reply Inline Actions The `M` contains generated registration glue at this point, right? It may be worth a comment to explain what is it that we're compiling here. tra: The `M` contains generated registration glue at this point, right? It may be worth a comment to…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions Sure. jhuber6: Sure.
return compileModule(M);		auto FileOrErr = compileModule(M);
		if (!FileOrErr)
		return FileOrErr.takeError();
		WrappedImages.push_back(*FileOrErr);
		}

		return WrappedImages;
}		}

Optional<std::string> findFile(StringRef Dir, const Twine &Name) {		Optional<std::string> findFile(StringRef Dir, const Twine &Name) {
SmallString<128> Path;		SmallString<128> Path;
if (Dir.startswith("="))		if (Dir.startswith("="))
sys::path::append(Path, Sysroot, Dir.substr(1), Name);		sys::path::append(Path, Sysroot, Dir.substr(1), Name);
else		else
sys::path::append(Path, Dir, Name);		sys::path::append(Path, Dir, Name);
▲ Show 20 Lines • Show All 114 Lines • ▼ Show 20 Lines	for (std::string &Arg : LinkerArgs) {
}		}
}		}

// Add the device bitcode libraries to the device files if any were passed in.		// Add the device bitcode libraries to the device files if any were passed in.
for (StringRef LibraryStr : BitcodeLibraries)		for (StringRef LibraryStr : BitcodeLibraries)
DeviceFiles.push_back(getBitcodeLibrary(LibraryStr));		DeviceFiles.push_back(getBitcodeLibrary(LibraryStr));

// Link the device images extracted from the linker input.		// Link the device images extracted from the linker input.
SmallVector<std::string, 16> LinkedImages;		SmallVector<DeviceFile, 4> LinkedImages;
if (Error Err = linkDeviceFiles(DeviceFiles, LinkedImages))		if (Error Err = linkDeviceFiles(DeviceFiles, LinkedImages))
return reportError(std::move(Err));		return reportError(std::move(Err));

// Wrap each linked device image into a linkable host binary and add it to the		// Wrap each linked device image into a linkable host binary and add it to the
// link job's inputs.		// link job's inputs.
auto FileOrErr = wrapDeviceImages(LinkedImages);		auto FileOrErr = wrapDeviceImages(LinkedImages);
if (!FileOrErr)		if (!FileOrErr)
return reportError(FileOrErr.takeError());		return reportError(FileOrErr.takeError());
LinkerArgs.push_back(*FileOrErr);		LinkerArgs.append(*FileOrErr);

// Run the host linking job.		// Run the host linking job.
if (Error Err = runLinker(LinkerUserPath, LinkerArgs))		if (Error Err = runLinker(LinkerUserPath, LinkerArgs))
return reportError(std::move(Err));		return reportError(std::move(Err));

// Remove the temporary files created.		// Remove the temporary files created.
for (const auto &TempFile : TempFiles)		for (const auto &TempFile : TempFiles)
if (std::error_code EC = sys::fs::remove(TempFile))		if (std::error_code EC = sys::fs::remove(TempFile))
reportError(createFileError(TempFile, EC));		reportError(createFileError(TempFile, EC));

return EXIT_SUCCESS;		return EXIT_SUCCESS;
}		}

clang/tools/clang-linker-wrapper/OffloadWrapper.h

	//===- OffloadWrapper.h -------------------------------------------- C++ --===//			//===- OffloadWrapper.h --r-------------------------------------- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_TOOLS_CLANG_LINKER_WRAPPER_OFFLOAD_WRAPPER_H			#ifndef LLVM_CLANG_TOOLS_CLANG_LINKER_WRAPPER_OFFLOAD_WRAPPER_H
	#define LLVM_CLANG_TOOLS_CLANG_LINKER_WRAPPER_OFFLOAD_WRAPPER_H			#define LLVM_CLANG_TOOLS_CLANG_LINKER_WRAPPER_OFFLOAD_WRAPPER_H

	#include "llvm/ADT/ArrayRef.h"			#include "llvm/ADT/ArrayRef.h"
	#include "llvm/IR/Module.h"			#include "llvm/IR/Module.h"

	/// Wrap the input device images into the module \p M as global symbols and			/// Wraps the input device images into the module \p M as global symbols and
	/// registers the images with the OpenMP Offloading runtime libomptarget.			/// registers the images with the OpenMP Offloading runtime libomptarget.
	llvm::Error wrapBinaries(llvm::Module &M,			llvm::Error wrapOpenMPBinaries(llvm::Module &M,
	llvm::ArrayRef<llvm::ArrayRef<char>> Images);			llvm::ArrayRef<llvm::ArrayRef<char>> Images);

				/// Wraps the input fatbinary image into the module \p M as global symbols and
				traUnsubmitted Not Done Reply Inline Actions It should be either "Wraps/registers" or "Wrap/register". tra: It should be either "Wraps/registers" or "Wrap/register".
				/// registers the images with the CUDA runtime.
				llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef<char> Images);

	#endif			#endif

clang/tools/clang-linker-wrapper/OffloadWrapper.cpp

Show First 20 Lines • Show All 251 Lines • ▼ Show 20 Lines	void createUnregisterFunction(Module &M, GlobalVariable *BinDesc) {

// Add this function to global destructors.		// Add this function to global destructors.
// Match priority of __tgt_register_lib		// Match priority of __tgt_register_lib
appendToGlobalDtors(M, Func, /Priority/ 1);		appendToGlobalDtors(M, Func, /Priority/ 1);
}		}

} // namespace		} // namespace

Error wrapBinaries(Module &M, ArrayRef<ArrayRef<char>> Images) {		Error wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images) {
GlobalVariable *Desc = createBinDesc(M, Images);		GlobalVariable *Desc = createBinDesc(M, Images);
if (!Desc)		if (!Desc)
return createStringError(inconvertibleErrorCode(),		return createStringError(inconvertibleErrorCode(),
"No binary descriptors created.");		"No binary descriptors created.");
createRegisterFunction(M, Desc);		createRegisterFunction(M, Desc);
createUnregisterFunction(M, Desc);		createUnregisterFunction(M, Desc);
return Error::success();		return Error::success();
}		}

		llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef<char> Images) {
		// TODO: Implement this.
		traUnsubmitted Not Done Reply Inline Actions A "not-implemented-yet/TODO" comment would be appropriate here. tra: A "not-implemented-yet/TODO" comment would be appropriate here.
		return Error::success();
		}

llvm/include/llvm/Object/OffloadBinary.h

	Show All 25 Lines
	namespace llvm {			namespace llvm {

	/// The producer of the associated offloading image.			/// The producer of the associated offloading image.
	enum OffloadKind : uint16_t {			enum OffloadKind : uint16_t {
	OFK_None = 0,			OFK_None = 0,
	OFK_OpenMP,			OFK_OpenMP,
	OFK_Cuda,			OFK_Cuda,
	OFK_HIP,			OFK_HIP,
				OFK_LAST,
	};			};

	/// The type of contents the offloading image contains.			/// The type of contents the offloading image contains.
	enum ImageKind : uint16_t {			enum ImageKind : uint16_t {
	IMG_None = 0,			IMG_None = 0,
	IMG_Object,			IMG_Object,
	IMG_Bitcode,			IMG_Bitcode,
	IMG_Cubin,			IMG_Cubin,
	IMG_Fatbinary,			IMG_Fatbinary,
	IMG_PTX,			IMG_PTX,
				IMG_LAST,
	};			};

	/// A simple binary serialization of an offloading file. We use this format to			/// A simple binary serialization of an offloading file. We use this format to
	/// embed the offloading image into the host executable so it can be extracted			/// embed the offloading image into the host executable so it can be extracted
	/// and used by the linker.			/// and used by the linker.
	///			///
	/// Many of these could be stored in the same section by the time the linker			/// Many of these could be stored in the same section by the time the linker
	/// sees it so we mark this information with a header. The version is used to			/// sees it so we mark this information with a header. The version is used to
	▲ Show 20 Lines • Show All 98 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[Cuda] Add initial support for wrapping CUDA images in the new driver.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 428621

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

clang/tools/clang-linker-wrapper/OffloadWrapper.h

clang/tools/clang-linker-wrapper/OffloadWrapper.cpp

llvm/include/llvm/Object/OffloadBinary.h

This is an archive of the discontinued LLVM Phabricator instance.

[Cuda] Add initial support for wrapping CUDA images in the new driver.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 428621

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

clang/tools/clang-linker-wrapper/OffloadWrapper.h

clang/tools/clang-linker-wrapper/OffloadWrapper.cpp

llvm/include/llvm/Object/OffloadBinary.h

[Cuda] Add initial support for wrapping CUDA images in the new driver.
ClosedPublic