This is an archive of the discontinued LLVM Phabricator instance.

Use allocate_file in FileOutputBuffer to avoid possible bus error.
Needs ReviewPublic

Authored by ruiu on Nov 1 2017, 3:33 PM.

Download Raw Diff

Details

Reviewers

mgorny
andrewrk
emaste
kettenis
• espindola

Summary

This patch makes FileOutputBuffer to use fallocate(2) or equivalent
before mmap'ing a temporary file. If an operating system or a filesystem
do not support fallocate(2), FileOutputBuffer now creates an in-memory
buffer instead of a temporary file.

As a result, after committing this patch, all disk full errors are
gracefully handled by LLVM (including lld). Previously, we could be
killed by a bus error when a disk becomes full while writing to a sparse
file. That will never happen with this patch.

In return for better error handling, this patch disables mmap IO on
systems that don't support fallocate(2) or equivalent. That's most
Unix systems except macOS or Linux. On such systems, FileOutputBuffer
creates an in-memory buffer and write it to an output file using write(2)
on commit(). That is inefficient compared to mmap, but that is only
visible only when we are using FileOutputBuffer for lld and when lld is
creating a huge executable in practice, because in most cases IO speed
is not a limiting factor.

So, there's a tradeoff. Which we prefer, maximum IO performance or a
reliable error handling? I think I can be convinced in either way, but
I'm leaning towards a reliable error handling because the way how LLVM
tools that use FileOutputBuffer die when a disk is full is really bad
and pretty user-unfriendly.

I wish many more Unix systems start supporting fallocate(2).

Depends on https://reviews.llvm.org/D39464.

Diff Detail

Build Status

Buildable 11729
Build 11729: arc lint + arc unit

Event Timeline

ruiu created this revision.Nov 1 2017, 3:33 PM

Herald added a subscriber: hiraditya. · View Herald TranscriptNov 1 2017, 3:33 PM

ruiu added a reviewer: kettenis.Nov 1 2017, 3:38 PM

Which we prefer, maximum IO performance or a reliable error handling?

Zig project prefers reliable error handling.

• espindola edited reviewers, added: • espindola; removed: • rafael.Mar 15 2018, 9:06 AM

Revision Contents

Path

Size

llvm/

lib/

Support/

FileOutputBuffer.cpp

88 lines

Diff 121200

llvm/lib/Support/FileOutputBuffer.cpp

Show All 32 Lines
// as the final output file. The final output file is atomically replaced		// as the final output file. The final output file is atomically replaced
// with the temporary file on commit().		// with the temporary file on commit().
class OnDiskBuffer : public FileOutputBuffer {		class OnDiskBuffer : public FileOutputBuffer {
public:		public:
OnDiskBuffer(StringRef Path, StringRef TempPath,		OnDiskBuffer(StringRef Path, StringRef TempPath,
std::unique_ptr<fs::mapped_file_region> Buf)		std::unique_ptr<fs::mapped_file_region> Buf)
: FileOutputBuffer(Path), Buffer(std::move(Buf)), TempPath(TempPath) {}		: FileOutputBuffer(Path), Buffer(std::move(Buf)), TempPath(TempPath) {}

static ErrorOr<std::unique_ptr<OnDiskBuffer>>
create(StringRef Path, size_t Size, unsigned Mode);

uint8_t getBufferStart() const override { return (uint8_t )Buffer->data(); }		uint8_t getBufferStart() const override { return (uint8_t )Buffer->data(); }

uint8_t *getBufferEnd() const override {		uint8_t *getBufferEnd() const override {
return (uint8_t *)Buffer->data() + Buffer->size();		return (uint8_t *)Buffer->data() + Buffer->size();
}		}

size_t getBufferSize() const override { return Buffer->size(); }		size_t getBufferSize() const override { return Buffer->size(); }

Show All 21 Lines

// A FileOutputBuffer which keeps data in memory and writes to the final		// A FileOutputBuffer which keeps data in memory and writes to the final
// output file on commit(). This is used only when we cannot use OnDiskBuffer.		// output file on commit(). This is used only when we cannot use OnDiskBuffer.
class InMemoryBuffer : public FileOutputBuffer {		class InMemoryBuffer : public FileOutputBuffer {
public:		public:
InMemoryBuffer(StringRef Path, MemoryBlock Buf, unsigned Mode)		InMemoryBuffer(StringRef Path, MemoryBlock Buf, unsigned Mode)
: FileOutputBuffer(Path), Buffer(Buf), Mode(Mode) {}		: FileOutputBuffer(Path), Buffer(Buf), Mode(Mode) {}

static ErrorOr<std::unique_ptr<InMemoryBuffer>>
create(StringRef Path, size_t Size, unsigned Mode) {
std::error_code EC;
MemoryBlock MB = Memory::allocateMappedMemory(
Size, nullptr, sys::Memory::MF_READ \| sys::Memory::MF_WRITE, EC);
if (EC)
return EC;
return llvm::make_unique<InMemoryBuffer>(Path, MB, Mode);
}

uint8_t getBufferStart() const override { return (uint8_t )Buffer.base(); }		uint8_t getBufferStart() const override { return (uint8_t )Buffer.base(); }

uint8_t *getBufferEnd() const override {		uint8_t *getBufferEnd() const override {
return (uint8_t *)Buffer.base() + Buffer.size();		return (uint8_t *)Buffer.base() + Buffer.size();
}		}

size_t getBufferSize() const override { return Buffer.size(); }		size_t getBufferSize() const override { return Buffer.size(); }

std::error_code commit() override {		std::error_code commit() override {
int FD;		int FD;
std::error_code EC;		std::error_code EC;
if (auto EC = openFileForWrite(FinalPath, FD, fs::F_None, Mode))		if (auto EC = openFileForWrite(FinalPath, FD, fs::F_None, Mode))
return EC;		return EC;
raw_fd_ostream OS(FD, /shouldClose=/true, /unbuffered=/true);		raw_fd_ostream OS(FD, /shouldClose=/true, /unbuffered=/true);
OS << StringRef((const char *)Buffer.base(), Buffer.size());		OS << StringRef((const char *)Buffer.base(), Buffer.size());
return std::error_code();		return std::error_code();
}		}

private:		private:
OwningMemoryBlock Buffer;		OwningMemoryBlock Buffer;
unsigned Mode;		unsigned Mode;
};		};

ErrorOr<std::unique_ptr<OnDiskBuffer>>		static ErrorOr<std::unique_ptr<FileOutputBuffer>>
OnDiskBuffer::create(StringRef Path, size_t Size, unsigned Mode) {		createInMemoryBuffer(StringRef Path, size_t Size, unsigned Mode) {
// Create new file in same directory but with random name.
SmallString<128> TempPath;
int FD;
if (auto EC = fs::createUniqueFile(Path + ".tmp%%%%%%%", FD, TempPath, Mode))
return EC;

sys::RemoveFileOnSignal(TempPath);

#ifndef LLVM_ON_WIN32
// On Windows, CreateFileMapping (the mmap function on Windows)
// automatically extends the underlying file. We don't need to
// extend the file beforehand. _chsize (ftruncate on Windows) is
// pretty slow just like it writes specified amount of bytes,
// so we should avoid calling that function.
if (auto EC = fs::resize_file(FD, Size))
return EC;
#endif

// Mmap it.
std::error_code EC;		std::error_code EC;
auto MappedFile = llvm::make_unique<fs::mapped_file_region>(		MemoryBlock MB = Memory::allocateMappedMemory(
FD, fs::mapped_file_region::readwrite, Size, 0, EC);		Size, nullptr, sys::Memory::MF_READ \| sys::Memory::MF_WRITE, EC);
close(FD);
if (EC)		if (EC)
return EC;		return EC;
return llvm::make_unique<OnDiskBuffer>(Path, TempPath, std::move(MappedFile));		return llvm::make_unique<InMemoryBuffer>(Path, MB, Mode);
}		}

// Create an instance of FileOutputBuffer.		// Create an instance of FileOutputBuffer.
ErrorOr<std::unique_ptr<FileOutputBuffer>>		ErrorOr<std::unique_ptr<FileOutputBuffer>>
FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {		FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
unsigned Mode = fs::all_read \| fs::all_write;		unsigned Mode = fs::all_read \| fs::all_write;
if (Flags & F_executable)		if (Flags & F_executable)
Mode \|= fs::all_exe;		Mode \|= fs::all_exe;
Show All 9 Lines	FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
// use rename (e.g. we don't want to replace /dev/null with a regular		// use rename (e.g. we don't want to replace /dev/null with a regular
// file.) If that's the case, we create an in-memory buffer, open the		// file.) If that's the case, we create an in-memory buffer, open the
// destination file and write to it on commit().		// destination file and write to it on commit().
switch (Stat.type()) {		switch (Stat.type()) {
case fs::file_type::directory_file:		case fs::file_type::directory_file:
return errc::is_a_directory;		return errc::is_a_directory;
case fs::file_type::regular_file:		case fs::file_type::regular_file:
case fs::file_type::file_not_found:		case fs::file_type::file_not_found:
case fs::file_type::status_error:		case fs::file_type::status_error: {
return OnDiskBuffer::create(Path, Size, Mode);		// Create new file in same directory but with random name.
		SmallString<128> TempPath;
		int FD;
		if (auto EC = fs::createUniqueFile(Path + ".tmp%%%%%%%", FD, TempPath, Mode))
		return EC;

		sys::RemoveFileOnSignal(TempPath);

		#ifndef LLVM_ON_WIN32
		// If you mmap a sparse file for writing and the disk becomes full
		// when writing to an unallocated block of the file, you'll receive
		// a signal (which is usually SIGBUS). There's no reliable and
		// portable way to gracefully handle such disk full situation. So,
		// in order to avoid it, we preallocate all disk blocks by calling
		// fallocate(2) or equivalent.
		//
		// If an operating system or a filesystem don't support fallocate,
		// we use in-memory buffer so that we can catch disk full error on
		// commit().
		//
		// On Windows, CreateFileMapping (the mmap function on Windows)
		// automatically extends the underlying file. We don't need to
		// extend the file beforehand, and calling _chsize (which is slow)
		// beforehand is just a waste of time.
		if (auto EC = fs::allocate_file(FD, Size)) {
		if (EC == errc::function_not_supported)
		return createInMemoryBuffer(Path, Size, Mode);
		return EC;
		}
		#endif

		// Mmap it.
		std::error_code EC;
		auto MappedFile = llvm::make_unique<fs::mapped_file_region>(
		FD, fs::mapped_file_region::readwrite, Size, 0, EC);
		close(FD);
		if (EC)
		return EC;
		return llvm::make_unique<OnDiskBuffer>(Path, TempPath, std::move(MappedFile));
		}
default:		default:
return InMemoryBuffer::create(Path, Size, Mode);		return createInMemoryBuffer(Path, Size, Mode);
}		}
}		}