diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -57,6 +57,19 @@ return ret; } +// The merged bitcode after LTO is large. Try openning a file stream that +// supports reading, seeking and writing. Such a file allows BitcodeWriter to +// flush buffered data to reduce memory comsuption. If this fails, open a file +// stream that supports only write. +static std::unique_ptr openLTOOutputFile(StringRef file) { + std::error_code ec; + std::unique_ptr fs = + std::make_unique(file, ec); + if (!ec) + return fs; + return openFile(file); +} + static std::string getThinLTOOutputFile(StringRef modulePath) { return lto::getThinLTOOutputFile( std::string(modulePath), std::string(config->thinLTOPrefixReplace.first), @@ -151,7 +164,8 @@ if (config->emitLLVM) { c.PostInternalizeModuleHook = [](size_t task, const Module &m) { - if (std::unique_ptr os = openFile(config->outputFile)) + if (std::unique_ptr os = + openLTOOutputFile(config->outputFile)) WriteBitcodeToFile(m, *os, false); return false; }; diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -47,7 +47,7 @@ public: /// Create a BitcodeWriter that writes to Buffer. - BitcodeWriter(SmallVectorImpl &Buffer); + BitcodeWriter(SmallVectorImpl &Buffer, raw_fd_stream *FS = nullptr); ~BitcodeWriter(); diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -20,17 +20,23 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitCodes.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { class BitstreamWriter { + /// Out - The buffer that keeps unflushed bytes. SmallVectorImpl &Out; + /// FS - The file stream that Out flushes to. If FS is nullptr, it does not + /// support read or seek, Out cannot be flushed until all data are written. + raw_fd_stream *FS; + /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. unsigned CurBit; - /// CurValue - The current value. Only bits < CurBit are valid. + /// CurValue - The current value. Only bits < CurBit are valid. uint32_t CurValue; /// CurCodeSize - This is the declared size of code values used for the @@ -64,15 +70,19 @@ void WriteByte(unsigned char Value) { Out.push_back(Value); + FlushToFile(); } void WriteWord(unsigned Value) { Value = support::endian::byte_swap(Value); Out.append(reinterpret_cast(&Value), reinterpret_cast(&Value + 1)); + FlushToFile(); } - size_t GetBufferOffset() const { return Out.size(); } + uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; } + + size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); } size_t GetWordIndex() const { size_t Offset = GetBufferOffset(); @@ -80,9 +90,23 @@ return Offset / 4; } + /// If the related file stream supports reading, seeking and writing, flush + /// the buffer if its size is above a threshold. + void FlushToFile() { + constexpr uint64_t FlushThreshold = 1UL << 29; // 512M + if (!FS) { + return; + } + if (Out.size() < FlushThreshold) { + return; + } + FS->write((char *)&Out.front(), Out.size()); + Out.clear(); + } + public: - explicit BitstreamWriter(SmallVectorImpl &O) - : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} + BitstreamWriter(SmallVectorImpl &O, raw_fd_stream *FS = nullptr) + : Out(O), FS(FS), CurBit(0), CurValue(0), CurCodeSize(2) {} ~BitstreamWriter() { assert(CurBit == 0 && "Unflushed data remaining"); @@ -104,11 +128,55 @@ void BackpatchWord(uint64_t BitNo, unsigned NewWord) { using namespace llvm::support; uint64_t ByteNo = BitNo / 8; + uint64_t StartBit = BitNo & 7; + uint64_t NumOfFlushedBytes = GetNumOfFlushedBytes(); + + if (ByteNo >= NumOfFlushedBytes) { + assert((!endian::readAtBitAlignment( + &Out[ByteNo - NumOfFlushedBytes], StartBit)) && + "Expected to be patching over 0-value placeholders"); + endian::writeAtBitAlignment( + &Out[ByteNo - NumOfFlushedBytes], NewWord, StartBit); + return; + } + + // If the byte offset to backpatch is flushed, use seek to backfill data. + // First, save the file position to restore later. + uint64_t CurPos = FS->tell(); + + // Copy data to update into Bytes from the file FS and the buffer Out. + char Bytes[8]; + size_t BytesNum = StartBit ? 8 : 4; + size_t BytesFromDisk = std::min(BytesNum, NumOfFlushedBytes - ByteNo); + size_t BytesFromBuffer = BytesNum - BytesFromDisk; + +#ifdef NDEBUG + if (StartBit) +#endif + { + FS->seek(ByteNo); + ssize_t BytesRead = FS->read(Bytes, BytesFromDisk); + assert(BytesRead >= 0 && static_cast(BytesRead) == BytesFromDisk); + for (size_t i = 0; i < BytesFromBuffer; ++i) + Bytes[BytesFromDisk + i] = Out[i]; + } + + // Update Bytes in terms of bit offset and value. assert((!endian::readAtBitAlignment( - &Out[ByteNo], BitNo & 7)) && + Bytes, StartBit)) && "Expected to be patching over 0-value placeholders"); - endian::writeAtBitAlignment( - &Out[ByteNo], NewWord, BitNo & 7); + endian::writeAtBitAlignment(Bytes, NewWord, + StartBit); + + // Copy updated data back to the file FS and the buffer Out. + FS->seek(ByteNo); + FS->write(Bytes, BytesFromDisk); + for (size_t i = 0; i < BytesFromBuffer; ++i) { + Out[i] = Bytes[BytesFromDisk + i]; + } + + // Restore the file position. + FS->seek(CurPos); } void BackpatchWord64(uint64_t BitNo, uint64_t Val) { diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -47,7 +47,17 @@ /// buffered disciplines etc. It is a simple buffer that outputs /// a chunk at a time. class raw_ostream { +public: + // Class kinds to support LLVM-style RTTI. + enum class OStreamKind { + OK_OStream, + OK_FDStream, + OK_LastFDStream, + }; + private: + OStreamKind Kind; + /// The buffer is handled in such a way that the buffer is /// uninitialized, unbuffered, or out of space when OutBufCur >= /// OutBufEnd. Thus a single comparison suffices to determine if we @@ -105,9 +115,10 @@ static constexpr Colors SAVEDCOLOR = Colors::SAVEDCOLOR; static constexpr Colors RESET = Colors::RESET; - explicit raw_ostream(bool unbuffered = false) - : BufferMode(unbuffered ? BufferKind::Unbuffered - : BufferKind::InternalBuffer) { + explicit raw_ostream(bool unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream) + : Kind(K), BufferMode(unbuffered ? BufferKind::Unbuffered + : BufferKind::InternalBuffer) { // Start out ready to flush. OutBufStart = OutBufEnd = OutBufCur = nullptr; } @@ -120,6 +131,8 @@ /// tell - Return the current offset with the file. uint64_t tell() const { return current_pos() + GetNumBytesInBuffer(); } + OStreamKind get_kind() const { return Kind; } + //===--------------------------------------------------------------------===// // Configuration Interface //===--------------------------------------------------------------------===// @@ -388,8 +401,9 @@ void anchor() override; public: - explicit raw_pwrite_stream(bool Unbuffered = false) - : raw_ostream(Unbuffered) {} + explicit raw_pwrite_stream(bool Unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream) + : raw_ostream(Unbuffered, K) {} void pwrite(const char *Ptr, size_t Size, uint64_t Offset) { #ifndef NDEBUG uint64_t Pos = tell(); @@ -436,10 +450,17 @@ /// Determine an efficient buffer size. size_t preferred_buffer_size() const override; + void anchor() override; + +protected: /// Set the flag indicating that an output error has been encountered. void error_detected(std::error_code EC) { this->EC = EC; } - void anchor() override; + /// Return the file descriptor. + int get_fd() const { return FD; } + + // Update the file position by increasing \p Delta. + void inc_pos(uint64_t Delta) { pos += Delta; } public: /// Open the specified file for writing. If an error occurs, information @@ -464,7 +485,8 @@ /// FD is the file descriptor that this writes to. If ShouldClose is true, /// this closes the file when the stream is destroyed. If FD is for stdout or /// stderr, it will not be closed. - raw_fd_ostream(int fd, bool shouldClose, bool unbuffered=false); + raw_fd_ostream(int fd, bool shouldClose, bool unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream); ~raw_fd_ostream() override; @@ -548,6 +570,34 @@ /// This returns a reference to a raw_ostream which simply discards output. raw_ostream &nulls(); +//===----------------------------------------------------------------------===// +// File Streams +//===----------------------------------------------------------------------===// + +/// A raw_ostream of a file for reading/writing/seeking. +/// +class raw_fd_stream : public raw_fd_ostream { +public: + /// Open the specified file for reading/writing/seeking. If an error occurs, + /// information about the error is put into EC, and the stream should be + /// immediately destroyed. + raw_fd_stream(StringRef Filename, std::error_code &EC); + + /// This reads the \p Size bytes into a buffer pointed by \p Ptr. + /// + /// \param Ptr The start of the buffer to hold data to be read. + /// + /// \param Size The number of bytes to be read. + /// + /// On success, the number of bytes read is returned, and the file position is + /// advanced by this number. On error, -1 is returned, use error() to get the + /// error code. + ssize_t read(char *Ptr, size_t Size); + + /// Check if \p OS is a pointer of type raw_fd_stream*. + static bool classof(const raw_ostream *OS); +}; + //===----------------------------------------------------------------------===// // Output Stream Adaptors //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4453,8 +4453,8 @@ Stream.Emit(0xD, 4); } -BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer) - : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) { +BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer, raw_fd_stream *FS) + : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS)) { writeBitcodeHeader(*Stream); } @@ -4565,7 +4565,7 @@ if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); - BitcodeWriter Writer(Buffer); + BitcodeWriter Writer(Buffer, dyn_cast(&Out)); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); Writer.writeSymtab(); @@ -4575,7 +4575,9 @@ emitDarwinBCHeaderAndTrailer(Buffer, TT); // Write the generated bitstream to "Out". - Out.write((char*)&Buffer.front(), Buffer.size()); + if (!Buffer.empty()) { + Out.write((char *)&Buffer.front(), Buffer.size()); + } } void IndexBitcodeWriter::write() { diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -620,8 +620,9 @@ /// FD is the file descriptor that this writes to. If ShouldClose is true, this /// closes the file when the stream is destroyed. -raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered) - : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose) { +raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered, + OStreamKind K) + : raw_pwrite_stream(unbuffered, K), FD(fd), ShouldClose(shouldClose) { if (FD < 0 ) { ShouldClose = false; return; @@ -904,6 +905,37 @@ return S; } +//===----------------------------------------------------------------------===// +// File Streams +//===----------------------------------------------------------------------===// + +raw_fd_stream::raw_fd_stream(StringRef Filename, std::error_code &EC) + : raw_fd_ostream(getFD(Filename, EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write | sys::fs::FA_Read, + sys::fs::OF_None), + true, false, OStreamKind::OK_FDStream) { + if (EC) + return; + + // Do not support non-seekable files. + if (!supportsSeeking()) + EC = std::make_error_code(std::errc::invalid_argument); +} + +ssize_t raw_fd_stream::read(char *Ptr, size_t Size) { + assert(get_fd() >= 0 && "File already closed."); + ssize_t Ret = ::read(get_fd(), (void*)Ptr, Size); + if (Ret >= 0) + inc_pos(Ret); + else + error_detected(std::error_code(errno, std::generic_category())); + return Ret; +} + +bool raw_fd_stream::classof(const raw_ostream *OS) { + return OS->get_kind() == OStreamKind::OK_FDStream; +} + //===----------------------------------------------------------------------===// // raw_string_ostream //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/Support/raw_fd_stream_test.cpp b/llvm/unittests/Support/raw_fd_stream_test.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/raw_fd_stream_test.cpp @@ -0,0 +1,67 @@ +//===- llvm/unittest/Support/raw_fd_stream_test.cpp - raw_fd_stream tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(raw_fd_streamTest, ReadAfterWrite) { + SmallString<64> Path; + int FD; + ASSERT_FALSE(sys::fs::createTemporaryFile("foo", "bar", FD, Path)); + FileRemover Cleanup(Path); + std::error_code EC; + raw_fd_stream OS(Path, EC); + EXPECT_TRUE(!EC); + + char Bytes[8]; + + OS.write("01234567", 8); + + OS.seek(3); + EXPECT_EQ(OS.read(Bytes, 2), 2); + EXPECT_EQ(Bytes[0], '3'); + EXPECT_EQ(Bytes[1], '4'); + + OS.seek(4); + OS.write("xyz", 3); + + OS.seek(0); + EXPECT_EQ(OS.read(Bytes, 8), 8); + EXPECT_EQ(Bytes[0], '0'); + EXPECT_EQ(Bytes[1], '1'); + EXPECT_EQ(Bytes[2], '2'); + EXPECT_EQ(Bytes[3], '3'); + EXPECT_EQ(Bytes[4], 'x'); + EXPECT_EQ(Bytes[5], 'y'); + EXPECT_EQ(Bytes[6], 'z'); + EXPECT_EQ(Bytes[7], '7'); +} + +TEST(raw_fd_streamTest, DynCast) { + { + std::error_code EC; + raw_fd_stream OS("-", EC); + EXPECT_TRUE(dyn_cast(&OS)); + } + { + std::error_code EC; + raw_fd_ostream OS("-", EC); + EXPECT_FALSE(dyn_cast(&OS)); + } +} + +}