Index: llvm/include/llvm/Bitcode/BitstreamReader.h =================================================================== --- llvm/include/llvm/Bitcode/BitstreamReader.h +++ llvm/include/llvm/Bitcode/BitstreamReader.h @@ -159,8 +159,7 @@ bool canSkipToPos(size_t pos) const { // pos can be skipped to if it is a valid address or one byte past the end. - return pos == 0 || - R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1)); + return R->getBitcodeBytes().tryExtendTo(pos); } bool AtEndOfStream() { @@ -218,7 +217,9 @@ /// Get a pointer into the bitstream at the specified byte offset. const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { - return R->getBitcodeBytes().getPointer(ByteNo, NumBytes); + bool Result = R->getBitcodeBytes().tryExtendTo(ByteNo + NumBytes); + assert(Result); + return R->getBitcodeBytes().getBuffer().data() + ByteNo; } /// Get a pointer into the bitstream at the specified bit offset. @@ -234,22 +235,31 @@ report_fatal_error("Unexpected end of file"); // Read the next word from the stream. - uint8_t Array[sizeof(word_t)] = {0}; - - uint64_t BytesRead = - R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); + R->getBitcodeBytes().tryExtendTo(NextChar + sizeof(word_t)); + ArrayRef<uint8_t> Buf = R->getBitcodeBytes().getBuffer(); // If we run out of data, stop at the end of the stream. - if (BytesRead == 0) { + if (Buf.size() <= NextChar) { CurWord = 0; BitsInCurWord = 0; - Size = NextChar; + this->Size = Buf.size(); return; } - CurWord = - support::endian::read<word_t, support::little, support::unaligned>( - Array); + const uint8_t *NextCharPtr = Buf.data() + NextChar; + unsigned BytesRead; + if (Buf.size() >= NextChar + sizeof(word_t)) { + BytesRead = sizeof(word_t); + CurWord = + support::endian::read<word_t, support::little, support::unaligned>( + NextCharPtr); + } else { + // Short read. + BytesRead = Buf.size() - NextChar; + CurWord = 0; + for (unsigned B = 0; B != BytesRead; ++B) + CurWord |= NextCharPtr[B] << (B * 8); + } NextChar += BytesRead; BitsInCurWord = BytesRead * 8; } Index: llvm/include/llvm/Support/MemoryObject.h =================================================================== --- llvm/include/llvm/Support/MemoryObject.h +++ llvm/include/llvm/Support/MemoryObject.h @@ -10,57 +10,59 @@ #ifndef LLVM_SUPPORT_MEMORYOBJECT_H #define LLVM_SUPPORT_MEMORYOBJECT_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/DataTypes.h" namespace llvm { /// Interface to data which might be streamed. Streamability has 2 important /// implications/restrictions. First, the data might not yet exist in memory -/// when the request is made. This just means that readByte/readBytes might have +/// when the request is made. This just means that tryExtendTo() might have /// to block or do some work to get it. More significantly, the exact size of /// the object might not be known until it has all been fetched. This means that /// to return the right result, getExtent must also wait for all the data to /// arrive; therefore it should not be called on objects which are actually /// streamed (this would defeat the purpose of streaming). Instead, -/// isValidAddress can be used to test addresses without knowing the exact size -/// of the stream. Finally, getPointer can be used instead of readBytes to avoid -/// extra copying. +/// tryExtendTo() can be used to test addresses without knowing the exact size +/// of the stream. class MemoryObject { +protected: + /// The data read so far. Derived classes should update this field in response + /// to calls to tryExtendToImpl(). + ArrayRef<uint8_t> Buf; + + /// Derived classes should implement tryExtendTo() in this function. + virtual bool tryExtendToImpl(uint64_t Size) = 0; + public: virtual ~MemoryObject(); - /// Returns the size of the region in bytes. (The region is contiguous, so - /// the highest valid address of the region is getExtent() - 1). - /// - /// @result - The size of the region. - virtual uint64_t getExtent() const = 0; + /// Returns a reference to a buffer containing all of the data read so far. + ArrayRef<uint8_t> getBuffer() const { + return Buf; + } - /// Tries to read a contiguous range of bytes from the region, up to the end - /// of the region. + /// Try to extend the buffer held by this MemoryObject to at least Size + /// bytes. Calling this function invalidates any buffer previously + /// returned by getBuffer(). /// - /// @param Buf - A pointer to a buffer to be filled in. Must be non-NULL - /// and large enough to hold size bytes. - /// @param Size - The number of bytes to copy. - /// @param Address - The address of the first byte, in the same space as - /// getBase(). - /// @result - The number of bytes read. - virtual uint64_t readBytes(uint8_t *Buf, uint64_t Size, - uint64_t Address) const = 0; + /// @result True if successful. + bool tryExtendTo(uint64_t Size) { + // First see if we already have the data. + if (Buf.size() >= Size) + return true; + return tryExtendToImpl(Size); + } - /// Ensures that the requested data is in memory, and returns a pointer to it. - /// More efficient than using readBytes if the data is already in memory. May - /// block until (address - base + size) bytes have been read - /// @param address - address of the byte, in the same space as getBase() - /// @param size - amount of data that must be available on return - /// @result - valid pointer to the requested data - virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const = 0; - - /// Returns true if the address is within the object (i.e. between base and - /// base + extent - 1 inclusive). May block until (address - base) bytes have - /// been read - /// @param address - address of the byte, in the same space as getBase() - /// @result - true if the address may be read with readByte() - virtual bool isValidAddress(uint64_t address) const = 0; + /// Returns the size of the region in bytes. (The region is contiguous, so + /// the highest valid address of the region is getExtent() - 1). Calling this + /// function invalidates any buffer previously returned by getBuffer(). + /// + /// @result - The size of the region. + uint64_t getExtent() { + tryExtendTo(-1ull); + return Buf.size(); + } }; } Index: llvm/include/llvm/Support/StreamingMemoryObject.h =================================================================== --- llvm/include/llvm/Support/StreamingMemoryObject.h +++ llvm/include/llvm/Support/StreamingMemoryObject.h @@ -23,13 +23,11 @@ /// addition to inherited members, it has the dropLeadingBytes and /// setKnownObjectSize methods which are not applicable to non-streamed objects. class StreamingMemoryObject : public MemoryObject { +protected: + bool tryExtendToImpl(uint64_t Size) override; + public: StreamingMemoryObject(std::unique_ptr<DataStreamer> Streamer); - uint64_t getExtent() const override; - uint64_t readBytes(uint8_t *Buf, uint64_t Size, - uint64_t Address) const override; - const uint8_t *getPointer(uint64_t Address, uint64_t Size) const override; - bool isValidAddress(uint64_t address) const override; /// Drop s bytes from the front of the stream, pushing the positions of the /// remaining bytes down by s. This is used to skip past the bitcode header, @@ -53,27 +51,9 @@ mutable size_t ObjectSize; // 0 if unknown, set if wrapper seen or EOF reached mutable bool EOFReached; - // Fetch enough bytes such that Pos can be read (i.e. BytesRead > - // Pos). Returns true if Pos can be read. Unlike most of the - // functions in BitcodeReader, returns true on success. Most of the - // requests will be small, but we fetch at kChunkSize bytes at a - // time to avoid making too many potentially expensive GetBytes - // calls. - bool fetchToPos(size_t Pos) const { - while (Pos >= BytesRead) { - if (EOFReached) - return false; - Bytes.resize(BytesRead + BytesSkipped + kChunkSize); - size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped], - kChunkSize); - BytesRead += bytes; - if (bytes == 0) { // reached EOF/ran out of bytes - if (ObjectSize == 0) - ObjectSize = BytesRead; - EOFReached = true; - } - } - return !ObjectSize || Pos < ObjectSize; + /// The implementation should call this after updating the buffer. + void setBuf() { + Buf = ArrayRef<uint8_t>(Bytes.data() + BytesSkipped, BytesRead); } StreamingMemoryObject(const StreamingMemoryObject&) = delete; Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -283,10 +283,10 @@ StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes)); Stream.init(&*StreamFile); - unsigned char buf[16]; - if (Bytes.readBytes(buf, 16, 0) != 16) + if (!Bytes.tryExtendTo(16)) return error("Invalid bitcode signature"); + const uint8_t *buf = Bytes.getBuffer().data(); if (!isBitcode(buf, buf + 16)) return error("Invalid bitcode signature"); Index: llvm/lib/Support/StreamingMemoryObject.cpp =================================================================== --- llvm/lib/Support/StreamingMemoryObject.cpp +++ llvm/lib/Support/StreamingMemoryObject.cpp @@ -16,110 +16,55 @@ namespace { class RawMemoryObject : public MemoryObject { -public: - RawMemoryObject(const unsigned char *Start, const unsigned char *End) : - FirstChar(Start), LastChar(End) { - assert(LastChar >= FirstChar && "Invalid start/end range"); - } - - uint64_t getExtent() const override { - return LastChar - FirstChar; - } - uint64_t readBytes(uint8_t *Buf, uint64_t Size, - uint64_t Address) const override; - const uint8_t *getPointer(uint64_t address, uint64_t size) const override; - bool isValidAddress(uint64_t address) const override { - return validAddress(address); - } +protected: + // We already have the entire buffer. + bool tryExtendToImpl(uint64_t Size) override { return false; } -private: - const uint8_t* const FirstChar; - const uint8_t* const LastChar; - - // These are implemented as inline functions here to avoid multiple virtual - // calls per public function - bool validAddress(uint64_t address) const { - return static_cast<std::ptrdiff_t>(address) < LastChar - FirstChar; +public: + RawMemoryObject(const unsigned char *Start, const unsigned char *End) { + assert(End >= Start && "Invalid start/end range"); + Buf = ArrayRef<uint8_t>(Start, End - Start); } RawMemoryObject(const RawMemoryObject&) = delete; void operator=(const RawMemoryObject&) = delete; }; - -uint64_t RawMemoryObject::readBytes(uint8_t *Buf, uint64_t Size, - uint64_t Address) const { - uint64_t BufferSize = LastChar - FirstChar; - if (Address >= BufferSize) - return 0; - - uint64_t End = Address + Size; - if (End > BufferSize) - End = BufferSize; - - assert(static_cast<int64_t>(End - Address) >= 0); - Size = End - Address; - memcpy(Buf, Address + FirstChar, Size); - return Size; -} - -const uint8_t *RawMemoryObject::getPointer(uint64_t address, - uint64_t size) const { - return FirstChar + address; -} } // anonymous namespace namespace llvm { -// If the bitcode has a header, then its size is known, and we don't have to -// block until we actually want to read it. -bool StreamingMemoryObject::isValidAddress(uint64_t address) const { - if (ObjectSize && address < ObjectSize) return true; - return fetchToPos(address); -} - -uint64_t StreamingMemoryObject::getExtent() const { - if (ObjectSize) return ObjectSize; - size_t pos = BytesRead + kChunkSize; - // keep fetching until we run out of bytes - while (fetchToPos(pos)) pos += kChunkSize; - return ObjectSize; -} - -uint64_t StreamingMemoryObject::readBytes(uint8_t *Buf, uint64_t Size, - uint64_t Address) const { - fetchToPos(Address + Size - 1); - // Note: For wrapped bitcode files will set ObjectSize after the - // first call to fetchToPos. In such cases, ObjectSize can be - // smaller than BytesRead. - size_t MaxAddress = - (ObjectSize && ObjectSize < BytesRead) ? ObjectSize : BytesRead; - if (Address >= MaxAddress) - return 0; - uint64_t End = Address + Size; - if (End > MaxAddress) - End = MaxAddress; - assert(End >= Address); - Size = End - Address; - memcpy(Buf, &Bytes[Address + BytesSkipped], Size); - return Size; -} - -const uint8_t *StreamingMemoryObject::getPointer(uint64_t Address, - uint64_t Size) const { - fetchToPos(Address + Size - 1); - return &Bytes[Address + BytesSkipped]; +// Try to extend the buffer held by this MemoryObject to at least Size bytes. +// Returns true if sucessful. Most of the requests will be small, but we fetch at +// kChunkSize bytes at a time to avoid making too many potentially expensive +// GetBytes calls. +bool StreamingMemoryObject::tryExtendToImpl(size_t Size) { + while (Size > BytesRead && !EOFReached) { + Bytes.resize(BytesRead + BytesSkipped + kChunkSize); + size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped], + kChunkSize); + BytesRead += bytes; + if (bytes == 0) { // reached EOF/ran out of bytes + if (ObjectSize == 0) + ObjectSize = BytesRead; + EOFReached = true; + } + } + setBuf(); + return !EOFReached; } bool StreamingMemoryObject::dropLeadingBytes(size_t s) { if (BytesRead < s) return true; BytesSkipped = s; BytesRead -= s; + setBuf(); return false; } void StreamingMemoryObject::setKnownObjectSize(size_t size) { ObjectSize = size; Bytes.reserve(size); + setBuf(); if (ObjectSize <= BytesRead) EOFReached = true; } @@ -134,5 +79,6 @@ : Bytes(kChunkSize), Streamer(std::move(Streamer)), BytesRead(0), BytesSkipped(0), ObjectSize(0), EOFReached(false) { BytesRead = this->Streamer->GetBytes(&Bytes[0], kChunkSize); + setBuf(); } } Index: llvm/unittests/Support/StreamingMemoryObjectTest.cpp =================================================================== --- llvm/unittests/Support/StreamingMemoryObjectTest.cpp +++ llvm/unittests/Support/StreamingMemoryObjectTest.cpp @@ -39,30 +39,27 @@ } }; -TEST(StreamingMemoryObjectTest, isValidAddress) { +TEST(StreamingMemoryObjectTest, tryExtendTo) { auto DS = make_unique<NullDataStreamer>(); StreamingMemoryObject O(std::move(DS)); - EXPECT_TRUE(O.isValidAddress(32 * 1024)); + EXPECT_TRUE(O.tryExtendTo(32 * 1024)); } TEST(StreamingMemoryObjectTest, setKnownObjectSize) { auto DS = make_unique<NullDataStreamer>(); StreamingMemoryObject O(std::move(DS)); - uint8_t Buf[32]; - EXPECT_EQ(16u, O.readBytes(Buf, 16, 0)); - O.setKnownObjectSize(24); - EXPECT_EQ(8u, O.readBytes(Buf, 16, 16)); + EXPECT_TRUE(O.tryExtendTo(StreamingMemoryObject::kChunkSize * 2)); + O.setKnownObjectSize(StreamingMemoryObject::kChunkSize * 2); + EXPECT_FALSE(O.tryExtendTo(StreamingMemoryObject::kChunkSize * 3)); + EXPECT_EQ(StreamingMemoryObject::kChunkSize * 2, O.getBuffer().size()); } -TEST(StreamingMemoryObjectTest, getPointer) { +TEST(StreamingMemoryObjectTest, getBuffer) { uint8_t InputBuffer[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; StreamingMemoryObject O(make_unique<BufferStreamer>(StringRef( reinterpret_cast<const char *>(InputBuffer), sizeof(InputBuffer)))); - EXPECT_TRUE(std::equal(InputBuffer + 1, InputBuffer + 2, O.getPointer(1, 2))); - EXPECT_TRUE(std::equal(InputBuffer + 3, InputBuffer + 7, O.getPointer(3, 4))); - EXPECT_TRUE(std::equal(InputBuffer + 4, InputBuffer + 8, O.getPointer(4, 5))); - EXPECT_TRUE(std::equal(InputBuffer, InputBuffer + 8, O.getPointer(0, 20))); + EXPECT_TRUE(std::equal(InputBuffer, InputBuffer + 8, O.getBuffer().data())); } } // end namespace