Skip to content

Commit 028eb5a

Browse files
committedNov 2, 2016
Bitcode: Change reader interface to take memory buffers.
As proposed on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2016-October/106595.html This change also fixes an API oddity where BitstreamCursor::Read() would return zero for the first read past the end of the bitstream, but would report_fatal_error for subsequent reads. Now we always report_fatal_error for all reads past the end. Updated clients to check for the end of the bitstream before reading from it. I also needed to add padding to the invalid bitcode tests in test/Bitcode/. This is because the streaming interface was not checking that the file size is a multiple of 4. Differential Revision: https://reviews.llvm.org/D26219 llvm-svn: 285773
1 parent ce898db commit 028eb5a

20 files changed

+91
-381
lines changed
 

‎clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -325,17 +325,16 @@ void ObjectFilePCHContainerReader::ExtractPCH(
325325
if ((!IsCOFF && Name == "__clangast") || (IsCOFF && Name == "clangast")) {
326326
StringRef Buf;
327327
Section.getContents(Buf);
328-
return StreamFile.init((const unsigned char *)Buf.begin(),
329-
(const unsigned char *)Buf.end());
328+
StreamFile = llvm::BitstreamReader(Buf);
329+
return;
330330
}
331331
}
332332
}
333333
handleAllErrors(OFOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) {
334334
if (EIB.convertToErrorCode() ==
335335
llvm::object::object_error::invalid_file_type)
336336
// As a fallback, treat the buffer as a raw AST.
337-
StreamFile.init((const unsigned char *)Buffer.getBufferStart(),
338-
(const unsigned char *)Buffer.getBufferEnd());
337+
StreamFile = llvm::BitstreamReader(Buffer);
339338
else
340339
EIB.log(llvm::errs());
341340
});

‎clang/lib/Frontend/PCHContainerOperations.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ std::unique_ptr<ASTConsumer> RawPCHContainerWriter::CreatePCHContainerGenerator(
6060

6161
void RawPCHContainerReader::ExtractPCH(
6262
llvm::MemoryBufferRef Buffer, llvm::BitstreamReader &StreamFile) const {
63-
StreamFile.init((const unsigned char *)Buffer.getBufferStart(),
64-
(const unsigned char *)Buffer.getBufferEnd());
63+
StreamFile = llvm::BitstreamReader(Buffer);
6564
}
6665

6766
PCHContainerOperations::PCHContainerOperations() {

‎clang/lib/Frontend/SerializedDiagnosticReader.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,7 @@ std::error_code SerializedDiagnosticReader::readDiagnostics(StringRef File) {
2424
if (!Buffer)
2525
return SDError::CouldNotLoad;
2626

27-
llvm::BitstreamReader StreamFile;
28-
StreamFile.init((const unsigned char *)(*Buffer)->getBufferStart(),
29-
(const unsigned char *)(*Buffer)->getBufferEnd());
30-
27+
llvm::BitstreamReader StreamFile(**Buffer);
3128
llvm::BitstreamCursor Stream(StreamFile);
3229

3330
// Sniff for the signature.

‎clang/lib/Serialization/ASTReader.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -3802,7 +3802,8 @@ static ASTFileSignature readASTFileSignature(llvm::BitstreamReader &StreamFile);
38023802

38033803
/// \brief Whether \p Stream starts with the AST/PCH file magic number 'CPCH'.
38043804
static bool startsWithASTFileMagic(BitstreamCursor &Stream) {
3805-
return Stream.Read(8) == 'C' &&
3805+
return Stream.canSkipToPos(4) &&
3806+
Stream.Read(8) == 'C' &&
38063807
Stream.Read(8) == 'P' &&
38073808
Stream.Read(8) == 'C' &&
38083809
Stream.Read(8) == 'H';

‎clang/lib/Serialization/GlobalModuleIndex.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,7 @@ GlobalModuleIndex::readIndex(StringRef Path) {
246246
std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
247247

248248
/// \brief The bitstream reader from which we'll read the AST file.
249-
llvm::BitstreamReader Reader((const unsigned char *)Buffer->getBufferStart(),
250-
(const unsigned char *)Buffer->getBufferEnd());
249+
llvm::BitstreamReader Reader(*Buffer);
251250

252251
/// \brief The main bitstream cursor for the main block.
253252
llvm::BitstreamCursor Cursor(Reader);

‎llvm/include/llvm/Bitcode/BitstreamReader.h

+36-73
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
#ifndef LLVM_BITCODE_BITSTREAMREADER_H
1616
#define LLVM_BITCODE_BITSTREAMREADER_H
1717

18+
#include "llvm/ADT/ArrayRef.h"
1819
#include "llvm/ADT/IntrusiveRefCntPtr.h"
1920
#include "llvm/ADT/SmallVector.h"
2021
#include "llvm/Bitcode/BitCodes.h"
2122
#include "llvm/Support/Endian.h"
2223
#include "llvm/Support/ErrorHandling.h"
2324
#include "llvm/Support/MathExtras.h"
24-
#include "llvm/Support/StreamingMemoryObject.h"
25+
#include "llvm/Support/MemoryBuffer.h"
2526
#include <algorithm>
2627
#include <cassert>
2728
#include <climits>
@@ -50,32 +51,25 @@ class BitstreamReader {
5051
};
5152

5253
private:
53-
std::unique_ptr<MemoryObject> BitcodeBytes;
54+
ArrayRef<uint8_t> BitcodeBytes;
5455

5556
std::vector<BlockInfo> BlockInfoRecords;
5657

5758
/// This is set to true if we don't care about the block/record name
5859
/// information in the BlockInfo block. Only llvm-bcanalyzer uses this.
59-
bool IgnoreBlockInfoNames;
60+
bool IgnoreBlockInfoNames = true;
6061

6162
public:
62-
BitstreamReader() : IgnoreBlockInfoNames(true) {
63-
}
64-
65-
BitstreamReader(const unsigned char *Start, const unsigned char *End)
66-
: IgnoreBlockInfoNames(true) {
67-
init(Start, End);
68-
}
69-
70-
BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)
71-
: BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {}
72-
73-
void init(const unsigned char *Start, const unsigned char *End) {
74-
assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
75-
BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
76-
}
63+
BitstreamReader() = default;
64+
BitstreamReader(ArrayRef<uint8_t> BitcodeBytes)
65+
: BitcodeBytes(BitcodeBytes) {}
66+
BitstreamReader(StringRef BitcodeBytes)
67+
: BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()),
68+
BitcodeBytes.size()) {}
69+
BitstreamReader(MemoryBufferRef BitcodeBytes)
70+
: BitstreamReader(BitcodeBytes.getBuffer()) {}
7771

78-
MemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
72+
ArrayRef<uint8_t> getBitcodeBytes() { return BitcodeBytes; }
7973

8074
/// This is called by clients that want block/record name information.
8175
void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
@@ -131,9 +125,6 @@ class SimpleBitstreamCursor {
131125
BitstreamReader *R = nullptr;
132126
size_t NextChar = 0;
133127

134-
// The size of the bicode. 0 if we don't know it yet.
135-
size_t Size = 0;
136-
137128
public:
138129
/// This is the current data we have pulled from the stream but have not
139130
/// returned to the client. This is specifically and intentionally defined to
@@ -159,17 +150,11 @@ class SimpleBitstreamCursor {
159150

160151
bool canSkipToPos(size_t pos) const {
161152
// pos can be skipped to if it is a valid address or one byte past the end.
162-
return pos == 0 ||
163-
R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1));
153+
return pos <= R->getBitcodeBytes().size();
164154
}
165155

166156
bool AtEndOfStream() {
167-
if (BitsInCurWord != 0)
168-
return false;
169-
if (Size != 0)
170-
return Size <= NextChar;
171-
fillCurWord();
172-
return BitsInCurWord == 0;
157+
return BitsInCurWord == 0 && R->getBitcodeBytes().size() <= NextChar;
173158
}
174159

175160
/// Return the bit # of the bit we are reading.
@@ -218,7 +203,7 @@ class SimpleBitstreamCursor {
218203

219204
/// Get a pointer into the bitstream at the specified byte offset.
220205
const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
221-
return R->getBitcodeBytes().getPointer(ByteNo, NumBytes);
206+
return R->getBitcodeBytes().data() + ByteNo;
222207
}
223208

224209
/// Get a pointer into the bitstream at the specified bit offset.
@@ -230,26 +215,25 @@ class SimpleBitstreamCursor {
230215
}
231216

232217
void fillCurWord() {
233-
if (Size != 0 && NextChar >= Size)
218+
ArrayRef<uint8_t> Buf = R->getBitcodeBytes();
219+
if (NextChar >= Buf.size())
234220
report_fatal_error("Unexpected end of file");
235221

236222
// Read the next word from the stream.
237-
uint8_t Array[sizeof(word_t)] = {0};
238-
239-
uint64_t BytesRead =
240-
R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
241-
242-
// If we run out of data, stop at the end of the stream.
243-
if (BytesRead == 0) {
223+
const uint8_t *NextCharPtr = Buf.data() + NextChar;
224+
unsigned BytesRead;
225+
if (Buf.size() >= NextChar + sizeof(word_t)) {
226+
BytesRead = sizeof(word_t);
227+
CurWord =
228+
support::endian::read<word_t, support::little, support::unaligned>(
229+
NextCharPtr);
230+
} else {
231+
// Short read.
232+
BytesRead = Buf.size() - NextChar;
244233
CurWord = 0;
245-
BitsInCurWord = 0;
246-
Size = NextChar;
247-
return;
234+
for (unsigned B = 0; B != BytesRead; ++B)
235+
CurWord |= NextCharPtr[B] << (B * 8);
248236
}
249-
250-
CurWord =
251-
support::endian::read<word_t, support::little, support::unaligned>(
252-
Array);
253237
NextChar += BytesRead;
254238
BitsInCurWord = BytesRead * 8;
255239
}
@@ -278,9 +262,9 @@ class SimpleBitstreamCursor {
278262

279263
fillCurWord();
280264

281-
// If we run out of data, stop at the end of the stream.
265+
// If we run out of data, abort.
282266
if (BitsLeft > BitsInCurWord)
283-
return 0;
267+
report_fatal_error("Unexpected end of file");
284268

285269
word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
286270

@@ -346,31 +330,7 @@ class SimpleBitstreamCursor {
346330
}
347331

348332
/// Skip to the end of the file.
349-
void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); }
350-
351-
/// Prevent the cursor from reading past a byte boundary.
352-
///
353-
/// Prevent the cursor from requesting byte reads past \c Limit. This is
354-
/// useful when working with a cursor on a StreamingMemoryObject, when it's
355-
/// desirable to avoid invalidating the result of getPointerToByte().
356-
///
357-
/// If \c Limit is on a word boundary, AtEndOfStream() will return true if
358-
/// the cursor position reaches or exceeds \c Limit, regardless of the true
359-
/// number of available bytes. Otherwise, AtEndOfStream() returns true when
360-
/// it reaches or exceeds the next word boundary.
361-
void setArtificialByteLimit(uint64_t Limit) {
362-
assert(getCurrentByteNo() < Limit && "Move cursor before lowering limit");
363-
364-
// Round to word boundary.
365-
Limit = alignTo(Limit, sizeof(word_t));
366-
367-
// Only change size if the new one is lower.
368-
if (!Size || Size > Limit)
369-
Size = Limit;
370-
}
371-
372-
/// Return the Size, if known.
373-
uint64_t getSizeIfKnown() const { return Size; }
333+
void skipToEnd() { NextChar = R->getBitcodeBytes().size(); }
374334
};
375335

376336
/// When advancing through a bitstream cursor, each advance can discover a few
@@ -470,6 +430,9 @@ class BitstreamCursor : SimpleBitstreamCursor {
470430
/// Advance the current bitstream, returning the next entry in the stream.
471431
BitstreamEntry advance(unsigned Flags = 0) {
472432
while (true) {
433+
if (AtEndOfStream())
434+
return BitstreamEntry::getError();
435+
473436
unsigned Code = ReadCode();
474437
if (Code == bitc::END_BLOCK) {
475438
// Pop the end of the block unless Flags tells us not to.

‎llvm/include/llvm/Bitcode/ReaderWriter.h

-8
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424

2525
namespace llvm {
2626
class BitstreamWriter;
27-
class DataStreamer;
2827
class LLVMContext;
2928
class Module;
3029
class ModulePass;
@@ -47,13 +46,6 @@ namespace llvm {
4746
LLVMContext &Context,
4847
bool ShouldLazyLoadMetadata = false);
4948

50-
/// Read the header of the specified stream and prepare for lazy
51-
/// deserialization and streaming of function bodies.
52-
ErrorOr<std::unique_ptr<Module>>
53-
getStreamedBitcodeModule(StringRef Name,
54-
std::unique_ptr<DataStreamer> Streamer,
55-
LLVMContext &Context);
56-
5749
/// Read the header of the specified bitcode buffer and extract just the
5850
/// triple information. If successful, this returns a string. On error, this
5951
/// returns "".

0 commit comments

Comments
 (0)
Please sign in to comment.