diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -40,6 +40,7 @@ enum OpenFlags : unsigned; enum CreationDisposition : unsigned; class FileLocker; +class mapped_file_region; } // end namespace fs } // end namespace sys @@ -133,6 +134,18 @@ OStreamKind get_kind() const { return Kind; } + /// If possible, preallocate space for stream data when the final size + /// is already known. Attempt to put more data into the stream + /// than it was reserved is not allowed. Set the stream to be unbuffered + /// if the space was successfully reserved. + virtual void reserve(uint64_t Size) { + assert(!ReservedSize && "Buffer space is already reserved."); + assert(tell() == 0 && + "Can`t reserve space if any data is already written."); + ReservedSize = Size; + SetUnbuffered(); + } + //===--------------------------------------------------------------------===// // Configuration Interface //===--------------------------------------------------------------------===// @@ -357,6 +370,8 @@ /// unbuffered. const char *getBufferStart() const { return OutBufStart; } + Optional ReservedSize; + //===--------------------------------------------------------------------===// // Private Interface //===--------------------------------------------------------------------===// @@ -453,6 +468,8 @@ void anchor() override; protected: + std::unique_ptr MMapFile; + /// Set the flag indicating that an output error has been encountered. void error_detected(std::error_code EC) { this->EC = EC; } @@ -554,6 +571,15 @@ /// It is used as @ref lock. LLVM_NODISCARD Expected tryLockFor(std::chrono::milliseconds Timeout); + + /// If possible, preallocate space for stream data when the final size + /// is already known. Attempt to put more data into the stream + /// than it was reserved is not allowed. Set the stream to be unbuffered + /// if the space was successfully reserved. raw_fd_ostream uses a + /// memory-mapped file as the buffer where data would be stored. The + /// sys::fs::FA_Read permission is required to allocate the memory-mapped file + /// buffer. + virtual void reserve(uint64_t Size) override; }; /// This returns a reference to a raw_fd_ostream for standard output. Use it @@ -626,6 +652,15 @@ flush(); return OS; } + + /// If possible, preallocate space for stream data when the final size + /// is already known. Attempt to put more data into the stream + /// than it was reserved is not allowed. Set the stream to be unbuffered + /// if the space was successfully reserved. + virtual void reserve(uint64_t Size) override { + raw_ostream::reserve(Size); + OS.reserve(Size); + } }; /// A raw_ostream that writes to an SmallVector or SmallString. This is a @@ -659,6 +694,15 @@ /// Return a StringRef for the vector contents. StringRef str() const { return StringRef(OS.data(), OS.size()); } + + /// If possible, preallocate space for stream data when the final size + /// is already known. Attempt to put more data into the stream + /// than it was reserved is not allowed. Set the stream to be unbuffered + /// if the space was successfully reserved. + virtual void reserve(uint64_t Size) override { + raw_ostream::reserve(Size); + OS.reserve(Size); + } }; /// A raw_ostream that discards all output. diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -662,6 +662,8 @@ raw_fd_ostream::~raw_fd_ostream() { if (FD >= 0) { flush(); + if (MMapFile) + MMapFile.reset(); if (ShouldClose) { if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) error_detected(EC); @@ -737,64 +739,74 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert(FD >= 0 && "File already closed."); - pos += Size; + assert(!ReservedSize || + pos + Size <= *ReservedSize && "Writing over reserved area."); + + if (MMapFile) { + memcpy(MMapFile->data() + pos, Ptr, Size); + pos += Size; + } else { + pos += Size; #if defined(_WIN32) - // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16 - // and using WriteConsoleW. If that fails, fall back to plain write(). - if (IsWindowsConsole) - if (write_console_impl(FD, StringRef(Ptr, Size))) - return; + // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16 + // and using WriteConsoleW. If that fails, fall back to plain write(). + if (IsWindowsConsole) + if (write_console_impl(FD, StringRef(Ptr, Size))) + return; #endif - // The maximum write size is limited to INT32_MAX. A write - // greater than SSIZE_MAX is implementation-defined in POSIX, - // and Windows _write requires 32 bit input. - size_t MaxWriteSize = INT32_MAX; + // The maximum write size is limited to INT32_MAX. A write + // greater than SSIZE_MAX is implementation-defined in POSIX, + // and Windows _write requires 32 bit input. + size_t MaxWriteSize = INT32_MAX; #if defined(__linux__) - // It is observed that Linux returns EINVAL for a very large write (>2G). - // Make it a reasonably small value. - MaxWriteSize = 1024 * 1024 * 1024; + // It is observed that Linux returns EINVAL for a very large write (>2G). + // Make it a reasonably small value. + MaxWriteSize = 1024 * 1024 * 1024; #endif - do { - size_t ChunkSize = std::min(Size, MaxWriteSize); - ssize_t ret = ::write(FD, Ptr, ChunkSize); - - if (ret < 0) { - // If it's a recoverable error, swallow it and retry the write. - // - // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since - // raw_ostream isn't designed to do non-blocking I/O. However, some - // programs, such as old versions of bjam, have mistakenly used - // O_NONBLOCK. For compatibility, emulate blocking semantics by - // spinning until the write succeeds. If you don't want spinning, - // don't use O_NONBLOCK file descriptors with raw_ostream. - if (errno == EINTR || errno == EAGAIN + do { + size_t ChunkSize = std::min(Size, MaxWriteSize); + ssize_t ret = ::write(FD, Ptr, ChunkSize); + + if (ret < 0) { + // If it's a recoverable error, swallow it and retry the write. + // + // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since + // raw_ostream isn't designed to do non-blocking I/O. However, some + // programs, such as old versions of bjam, have mistakenly used + // O_NONBLOCK. For compatibility, emulate blocking semantics by + // spinning until the write succeeds. If you don't want spinning, + // don't use O_NONBLOCK file descriptors with raw_ostream. + if (errno == EINTR || errno == EAGAIN #ifdef EWOULDBLOCK - || errno == EWOULDBLOCK + || errno == EWOULDBLOCK #endif - ) - continue; + ) + continue; - // Otherwise it's a non-recoverable error. Note it and quit. - error_detected(std::error_code(errno, std::generic_category())); - break; - } + // Otherwise it's a non-recoverable error. Note it and quit. + error_detected(std::error_code(errno, std::generic_category())); + break; + } - // The write may have written some or all of the data. Update the - // size and buffer pointer to reflect the remainder that needs - // to be written. If there are no bytes left, we're done. - Ptr += ret; - Size -= ret; - } while (Size > 0); + // The write may have written some or all of the data. Update the + // size and buffer pointer to reflect the remainder that needs + // to be written. If there are no bytes left, we're done. + Ptr += ret; + Size -= ret; + } while (Size > 0); + } } void raw_fd_ostream::close() { assert(ShouldClose); ShouldClose = false; flush(); + if (MMapFile) + MMapFile.reset(); if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) error_detected(EC); FD = -1; @@ -803,15 +815,21 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { assert(SupportsSeeking && "Stream does not support seeking!"); flush(); + if (MMapFile) { + assert(off <= *ReservedSize); + pos = off; + } else { #ifdef _WIN32 - pos = ::_lseeki64(FD, off, SEEK_SET); + pos = ::_lseeki64(FD, off, SEEK_SET); #elif defined(HAVE_LSEEK64) - pos = ::lseek64(FD, off, SEEK_SET); + pos = ::lseek64(FD, off, SEEK_SET); #else - pos = ::lseek(FD, off, SEEK_SET); + pos = ::lseek(FD, off, SEEK_SET); #endif - if (pos == (uint64_t)-1) - error_detected(std::error_code(errno, std::generic_category())); + if (pos == (uint64_t)-1) + error_detected(std::error_code(errno, std::generic_category())); + } + return pos; } @@ -879,6 +897,34 @@ void raw_fd_ostream::anchor() {} +void raw_fd_ostream::reserve(uint64_t Size) { + if (!ShouldClose || Size == 0) + return; + +#ifndef _WIN32 + // On Windows, CreateFileMapping (the mmap function on Windows) + // automatically extends the underlying file. We don't need to + // extend the file beforehand. _chsize (ftruncate on Windows) is + // pretty slow just like it writes specified amount of bytes, + // so we should avoid calling that function. + if (std::error_code EC = sys::fs::resize_file(FD, Size)) + return; +#endif + + // Mmap it. + std::error_code EC; + MMapFile = std::make_unique( + sys::fs::convertFDToNativeFile(FD), + sys::fs::mapped_file_region::readwrite, Size, 0, EC); + + if (EC) { + MMapFile.reset(); + return; + } + + raw_ostream::reserve(Size); +} + //===----------------------------------------------------------------------===// // outs(), errs(), nulls() //===----------------------------------------------------------------------===// @@ -922,12 +968,22 @@ ssize_t raw_fd_stream::read(char *Ptr, size_t Size) { assert(get_fd() >= 0 && "File already closed."); - ssize_t Ret = ::read(get_fd(), (void *)Ptr, Size); - if (Ret >= 0) - inc_pos(Ret); - else - error_detected(std::error_code(errno, std::generic_category())); - return Ret; + + if (MMapFile) { + size_t NumBytes = Size; + if ((tell() + Size) > *ReservedSize) + NumBytes -= tell() + Size - *ReservedSize; + memcpy(Ptr, MMapFile->data() + tell(), NumBytes); + inc_pos(NumBytes); + return NumBytes; + } else { + ssize_t Ret = ::read(get_fd(), (void *)Ptr, Size); + if (Ret >= 0) + inc_pos(Ret); + else + error_detected(std::error_code(errno, std::generic_category())); + return Ret; + } } bool raw_fd_stream::classof(const raw_ostream *OS) { @@ -943,6 +999,8 @@ } void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { + assert(!ReservedSize || + (OS.size() + Size <= *ReservedSize) && "Writing over reserved area"); OS.append(Ptr, Size); } @@ -953,6 +1011,8 @@ uint64_t raw_svector_ostream::current_pos() const { return OS.size(); } void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { + assert(!ReservedSize || + (OS.size() + Size <= *ReservedSize) && "Writing over reserved area"); OS.append(Ptr, Ptr + Size); } diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -91,6 +91,7 @@ YAMLParserTest.cpp formatted_raw_ostream_test.cpp raw_fd_stream_test.cpp + raw_fd_ostream_test.cpp raw_ostream_test.cpp raw_pwrite_stream_test.cpp raw_sha1_ostream_test.cpp diff --git a/llvm/unittests/Support/raw_fd_ostream_test.cpp b/llvm/unittests/Support/raw_fd_ostream_test.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/raw_fd_ostream_test.cpp @@ -0,0 +1,314 @@ +//===- llvm/unittest/Support/raw_fd_ostream_test.cpp ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +static void CheckFileData(StringRef FileName, StringRef GoldenData) { + ErrorOr> BufOrErr = + MemoryBuffer::getFileOrSTDIN(FileName); + EXPECT_FALSE(BufOrErr.getError()); + + EXPECT_EQ((*BufOrErr)->getBufferSize(), GoldenData.size()); + EXPECT_EQ(memcmp((*BufOrErr)->getBufferStart(), GoldenData.data(), + GoldenData.size()), + 0); +} + +void WriteDataToTheStream(std::function WriteData, + StringRef GoldenData) { + SmallString<64> Path; + int FD; + ASSERT_FALSE(sys::fs::createTemporaryFile("foo", "bar", FD, Path)); + FileRemover Cleanup(Path); + std::error_code EC; + + raw_fd_ostream ReadWriteAccessStream(Path, EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write | sys::fs::FA_Read, + sys::fs::OF_None); + EXPECT_TRUE(!EC); + + WriteData(ReadWriteAccessStream); + + ReadWriteAccessStream.close(); + + CheckFileData(Path.c_str(), GoldenData); + + raw_fd_ostream WriteOnlyAccessStream(Path, EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write, sys::fs::OF_None); + EXPECT_TRUE(!EC); + + WriteData(WriteOnlyAccessStream); + + WriteOnlyAccessStream.close(); + + CheckFileData(Path.c_str(), GoldenData); +} + +void WriteWrongDataToTheStream(std::function WriteData, + const char *ErrorDescription) { + SmallString<64> Path; + int FD; + ASSERT_FALSE(sys::fs::createTemporaryFile("foo", "bar", FD, Path)); + FileRemover Cleanup(Path); + std::error_code EC; + + raw_fd_ostream OS(Path, EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write | sys::fs::FA_Read, sys::fs::OF_None); + EXPECT_TRUE(!EC); + + EXPECT_DEATH(WriteData(OS), ErrorDescription); +} + +TEST(raw_fd_ostreamTest, EmptyFile) { + WriteDataToTheStream([](raw_fd_ostream &) {}, ""); + + WriteDataToTheStream([](raw_fd_ostream &OS) { OS.reserve(0); }, ""); +} + +TEST(raw_fd_ostreamTest, OneByteFile) { + WriteDataToTheStream([](raw_fd_ostream &OS) { OS << "A"; }, "A"); + + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS.reserve(1); + OS << "A"; + }, + "A"); +} + +TEST(raw_fd_ostreamTest, Write) { + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS << "0"; + OS << "1"; + OS << "2"; + OS << "3"; + OS << "4"; + OS << "5"; + OS << "6"; + OS << "7"; + }, + "01234567"); + + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS.reserve(8); + OS << "0"; + OS << "1"; + OS << "2"; + OS << "3"; + OS << "4"; + OS << "5"; + OS << "6"; + OS << "7"; + }, + "01234567"); +} + +TEST(raw_fd_ostreamTest, PWrite) { + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS << "0"; + OS.pwrite("7", 1, 0); + OS << "1"; + OS.pwrite("6", 1, 1); + OS << "2"; + OS.pwrite("5", 1, 2); + OS << "3"; + OS.pwrite("4", 1, 3); + OS << "4"; + OS.pwrite("3", 1, 4); + OS << "5"; + OS.pwrite("2", 1, 5); + OS << "6"; + OS.pwrite("1", 1, 6); + OS << "7"; + OS.pwrite("0", 1, 7); + }, + "76543210"); + + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS.reserve(8); + + OS << "0"; + OS.pwrite("7", 1, 0); + OS << "1"; + OS.pwrite("6", 1, 1); + OS << "2"; + OS.pwrite("5", 1, 2); + OS << "3"; + OS.pwrite("4", 1, 3); + OS << "4"; + OS.pwrite("3", 1, 4); + OS << "5"; + OS.pwrite("2", 1, 5); + OS << "6"; + OS.pwrite("1", 1, 6); + OS << "7"; + OS.pwrite("0", 1, 7); + }, + "76543210"); +} + +TEST(raw_fd_ostreamTest, PWriteFullReplace) { + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS.write("abcd", 4); + OS.pwrite("efgh", 4, 0); + }, + "efgh"); + + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS.reserve(4); + OS.write("abcd", 4); + OS.pwrite("efgh", 4, 0); + }, + "efgh"); +} + +TEST(raw_fd_ostreamTest, SeekAndWrite) { + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS.write("01234567", 8); + OS.seek(4); + OS.write("xyz", 3); + OS.seek(8); + }, + "0123xyz7"); + + WriteDataToTheStream( + [](raw_fd_ostream &OS) { + OS.reserve(8); + OS.write("01234567", 8); + OS.seek(4); + OS.write("xyz", 3); + OS.seek(8); + }, + "0123xyz7"); +} + +TEST(raw_fd_ostreamTest, Write4K) { + const size_t DataSize = 4096; + char Data[DataSize]; + memset(Data, 0xfe, DataSize); + WriteDataToTheStream([&](raw_fd_ostream &OS) { OS.write(Data, DataSize); }, + StringRef(Data, DataSize)); + + WriteDataToTheStream( + [&](raw_fd_ostream &OS) { + OS.reserve(DataSize); + OS.write(Data, DataSize); + }, + StringRef(Data, DataSize)); +} + +TEST(raw_fd_ostreamTest, WriteBuffered4K) { + const size_t DataSize = 4096; + char Data[DataSize]; + memset(Data, 0xfe, DataSize); + WriteDataToTheStream( + [&](raw_fd_ostream &OS) { + OS.SetBufferSize(128); + OS.write(Data, DataSize); + }, + StringRef(Data, DataSize)); + + WriteDataToTheStream( + [&](raw_fd_ostream &OS) { + OS.reserve(DataSize); + OS.SetBufferSize(128); + OS.write(Data, DataSize); + }, + StringRef(Data, DataSize)); +} + +TEST(raw_fd_ostreamTest, ReserveDevNull) { + std::error_code EC; + raw_fd_ostream OS("/dev/null", EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write | sys::fs::FA_Read, sys::fs::OF_None); + EXPECT_TRUE(!EC); + + OS.reserve(8); + OS << "01234567"; + EXPECT_TRUE(!OS.error()); +} + +TEST(raw_fd_ostreamTest, ReserveStdOut) { + std::error_code EC; + raw_fd_ostream OS("-", EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write | sys::fs::FA_Read, sys::fs::OF_None); + EXPECT_TRUE(!EC); + + OS.reserve(8); + OS << "01234567"; + EXPECT_TRUE(!OS.error()); +} + +TEST(raw_fd_ostreamTest, ReserveWithoutWriting) { + SmallString<64> Path; + int FD; + ASSERT_FALSE(sys::fs::createTemporaryFile("foo", "bar", FD, Path)); + FileRemover Cleanup(Path); + std::error_code EC; + + raw_fd_ostream OS(Path, EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write | sys::fs::FA_Read, sys::fs::OF_None); + EXPECT_TRUE(!EC); + OS.reserve(1000); + OS.close(); + EXPECT_FALSE(OS.has_error()); + + ErrorOr> BufOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + EXPECT_FALSE(BufOrErr.getError()); + + EXPECT_EQ((*BufOrErr)->getBufferSize(), 1000); +} + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG +TEST(raw_fd_ostreamTest, WriteMoreThanReserved) { + WriteWrongDataToTheStream( + [&](raw_fd_ostream &OS) { + OS.reserve(5); + OS.write("123456", 6); + }, + "Writing over reserved area."); +} + +TEST(raw_fd_ostreamTest, ReserveTwice) { + WriteWrongDataToTheStream( + [&](raw_fd_ostream &OS) { + OS.reserve(5); + OS.reserve(5); + }, + "Buffer space is already reserved."); +} + +TEST(raw_fd_ostreamTest, ReserveAfterWrite) { + WriteWrongDataToTheStream( + [&](raw_fd_ostream &OS) { + OS << "AAAAA"; + OS.reserve(5); + }, + "Can`t reserve space if any data is already written."); +} + +#endif +#endif + +} // namespace diff --git a/llvm/unittests/Support/raw_fd_stream_test.cpp b/llvm/unittests/Support/raw_fd_stream_test.cpp --- a/llvm/unittests/Support/raw_fd_stream_test.cpp +++ b/llvm/unittests/Support/raw_fd_stream_test.cpp @@ -51,6 +51,74 @@ EXPECT_EQ(Bytes[7], '7'); } +TEST(raw_fd_streamTest, PreallocatedReadAfterWrite) { + SmallString<64> Path; + int FD; + ASSERT_FALSE(sys::fs::createTemporaryFile("foo", "bar", FD, Path)); + FileRemover Cleanup(Path); + std::error_code EC; + raw_fd_stream OS(Path, EC); + EXPECT_TRUE(!EC); + + OS.reserve(10); + + char Bytes[10]; + + OS.write("01234567", 8); + + OS.seek(3); + EXPECT_EQ(OS.read(Bytes, 2), 2); + EXPECT_EQ(Bytes[0], '3'); + EXPECT_EQ(Bytes[1], '4'); + + OS.seek(4); + OS.write("xyz", 3); + + OS.seek(0); + EXPECT_EQ(OS.read(Bytes, 8), 8); + EXPECT_EQ(Bytes[0], '0'); + EXPECT_EQ(Bytes[1], '1'); + EXPECT_EQ(Bytes[2], '2'); + EXPECT_EQ(Bytes[3], '3'); + EXPECT_EQ(Bytes[4], 'x'); + EXPECT_EQ(Bytes[5], 'y'); + EXPECT_EQ(Bytes[6], 'z'); + EXPECT_EQ(Bytes[7], '7'); + + OS.seek(8); + OS << '8'; + OS.pwrite("abc", 3, 0); + OS << '9'; + OS.seek(0); + EXPECT_EQ(OS.read(Bytes, 10), 10); + EXPECT_EQ(Bytes[0], 'a'); + EXPECT_EQ(Bytes[1], 'b'); + EXPECT_EQ(Bytes[2], 'c'); + EXPECT_EQ(Bytes[3], '3'); + EXPECT_EQ(Bytes[4], 'x'); + EXPECT_EQ(Bytes[5], 'y'); + EXPECT_EQ(Bytes[6], 'z'); + EXPECT_EQ(Bytes[7], '7'); + EXPECT_EQ(Bytes[8], '8'); + EXPECT_EQ(Bytes[9], '9'); + + // check seeking into last position. + OS.seek(10); + + std::string DestString; + raw_string_ostream Dst(DestString); + + Dst.reserve(10); + + // check reading from the stream. + OS.seek(0); + for (size_t NumBytes = OS.read(Bytes, 3); NumBytes != 0; + NumBytes = OS.read(Bytes, 3)) + Dst.write(Bytes, NumBytes); + + EXPECT_EQ(DestString, "abc3xyz789"); +} + TEST(raw_fd_streamTest, DynCast) { { std::error_code EC;