Index: include/llvm/Bitcode/BitcodeConvert.h =================================================================== --- /dev/null +++ include/llvm/Bitcode/BitcodeConvert.h @@ -0,0 +1,163 @@ +//===- BitcodeConvert.h - Convert format of bitcode files -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Models three different forms of (assembled) bitcode files: binary, textual, +// and simplified. +// +// The binary form of an assembled bitcode file is the form generated when you +// assemble bitcode using llvm-as. The textual form is a human readable form of +// the contents of the assembled bitcode (and not the preassembled text of the +// corresponding LLVM instructions). +// +// The textual and simplified forms of bitcode files is modeled as a list of +// bitcode records. Each bitcode record is a list of (unsigned) 64-bit integer +// values. In the textual form, each line contains a bitcode record, and the +// bitcode record is terminated with a semicolon. Bitcode record values are +// separated by commas. +// +// The simplified form is like the textual form, except that the bitcode records +// are encoded using binary values rather than (just) printable characters. +// +// It should be noted that the major problem with the binary form of bitcode +// records is that each value inserted into the bitcode file has a variable +// length (bit) width that depends on the value stored. The encoding of the +// length of the value is part of the written value. Hence, most mutatations of +// the saved value is likely to cause the bitstream reader to get lost. +// +// Also, some bitcode records (such as enter block) are context sensitive in +// that it contains a position index to the end of the corresponding bitcode +// block (in the bitcode file). +// +// As a result, almost any bit-twiddling will generate a bitcode file that is +// not readable by the binary bitstream reader. This is the major reason that +// the binary bitcode form is not conducive to mutations (i.e. fuzzing). +// +// The goal of textual bitcode records is to provide a simple API to the +// contents of bitcode files that is human readable, easy to mutate, and easy to +// write test cases. To do this, the textual form removes the bitcode header. +// It also removes all notions of abbreviations, and thier corresponding +// bit-encoded forms. Rather, just the raw data within the bitcode records is +// written. +// +// To model block enter/exits as a bitcode record, special symbolic constants +// have been defined. The enter block gets a single parameter, the block ID, +// and the block exits gets no parameters. +// +// If comments are allowed, the comment begins with a semicolin. Any text after +// a semicolin, to the end of the line is considered a comment. Empty lines are +// also allowed. +// +// The simplified form of bitcode records is like the textual form, except that +// the bitcode records are written out using a single, simple binary encoding. +// Like the textual form, the bitcode header is removed, as well as +// abbreviations. +// +// This makes the simplified form conducive to fuzzing tools that do not +// implement tokenized mutations. +// +// The simplified form converts the sequence of bitcode records to a sequence of +// bytes. Each value is written out using the minimal number of bytes to capture +// the corresponding 'sign-rotated' value, followed by a special ACCEPT_VALUE +// byte. Each record is followed by a special ACCEPT_RECORD byte. If the end of +// the bitcode file doesn't contain a (necessary) ACCEPT_VALUE or ACCEPT_RECORD, +// they are automatically inserted. +// +// A sign-rotated value is generated by shifting left the absolute value of the +// integer, and moving the sign to the least significant bit. +// +// There are three special byte values used in the simplified form, +// corresponding to an action to take. They are: +// +// ACCEPT_VALUE: Accept the value defined by the bytes between this action, +// and the previous action. +// +// ACCEPT_RECORD: Accept the record defined by the sequence of values between +// this action, and the previous ACCEPT_RECORD action. +// +// ESCAPE: Treat the next byte as input rather than an action. +// +// Note: The main advantage of the textual and simplified forms is that bit +// twiddling mutations can be applied to this simplified form without garbling +// the rest of the input (this is the major problem with the binary form of +// bitcode files). Rather, just the surrounding values/records are effected by a +// mutation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_BITCODECONVERT_H +#define LLVM_BITCODE_BITCODECONVERT_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MemoryBuffer.h" + +#include + +namespace llvm { + +namespace bcconv { + +// Codes to represent block enter/exit in bitcode records. +enum SpecialBlockCodes { BLK_CODE_ENTER = 65535, BLK_CODE_EXIT = 65534 }; + +typedef SmallVectorImpl OutputBuffer; +typedef std::vector BitcodeRecord; +typedef std::vector> BitcodeRecordList; + +/// Holds (programmable) actions for the simplified form of the bitcode +/// reader/writer. +class SimplifiedBitcodeActions { + SimplifiedBitcodeActions(const SimplifiedBitcodeActions &) = delete; + void operator=(const SimplifiedBitcodeActions &) = delete; + +public: + SimplifiedBitcodeActions(); + uint8_t AcceptValue = AcceptValueDefault; + uint8_t AcceptRecord = AcceptRecordDefault; + uint8_t AcceptEscape = AcceptEscapeDefault; + + static const uint8_t AcceptValueDefault; + static const uint8_t AcceptRecordDefault; + static const uint8_t AcceptEscapeDefault; + + static const SimplifiedBitcodeActions Default; +}; + +/// Reads binary bitcode from Input, and updates Records with its contents. +std::error_code readBinaryBitcode(StringRef Input, BitcodeRecordList &Records); + +/// Reads textual bitcode from Input, and updates Records with its contents. If +/// AllowComments is true, the rules are relaxed to allow empty lines and +/// comment lines beginning with a semicolon. +std::error_code readTextualBitcode(StringRef Input, BitcodeRecordList &Records, + bool AllowComments = false); + +/// Reads simplified bitcode from input, and updates Records with its contents. +std::error_code readSimplifiedBitcode(StringRef Input, + BitcodeRecordList &Records, + const SimplifiedBitcodeActions &Actions); + +/// Writes the binary representation for Records into output. +bool writeBinaryBitcode(OutputBuffer &Output, const BitcodeRecordList &Records, + std::string &ErrorMessages, bool ErrorRecover = false); + +/// Writes the textual representation for Records into output. +bool writeTextualBitcode(OutputBuffer &Output, const BitcodeRecordList &Records, + std::string &ErrorMessages, bool ErrorRecover = false); + +/// Writes the simplified representation for records into output. +bool writeSimplifiedBitcode(OutputBuffer &Output, + const BitcodeRecordList &Records, + const SimplifiedBitcodeActions &Actions, + std::string &ErrorMessages, + bool ErrorRecover = false); + +} // end of namespace llvm::bcconv +} // end of namespace llvm + +#endif // LLVM_BITCODE_BITCODECONVERT_H Index: lib/Bitcode/CMakeLists.txt =================================================================== --- lib/Bitcode/CMakeLists.txt +++ lib/Bitcode/CMakeLists.txt @@ -1,2 +1,3 @@ +add_subdirectory(Convert) add_subdirectory(Reader) add_subdirectory(Writer) Index: lib/Bitcode/Convert/BinaryBitcodeReader.cpp =================================================================== --- /dev/null +++ lib/Bitcode/Convert/BinaryBitcodeReader.cpp @@ -0,0 +1,183 @@ +//===- BinaryBitcodeReader.cpp - Reads binary bitcode ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the binary bitcode reader that builds the corresponding list of +// bitcode records. +// +// ===---------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeConvert.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitstreamReader.h" +#include "llvm/Bitcode/ReaderWriter.h" + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::bcconv; + +namespace { + +// Defines reader error codes. +enum ReaderErrorType { + MalformedBlock = 1, + MalformedIRFile, + RecordAtTopLevel, + InvalidBitcodeSignature +}; + +// Defines the corresponding error messages. +class ReaderErrorCategoryType : public std::error_category { + ReaderErrorCategoryType(const ReaderErrorCategoryType &) = delete; + void operator=(const ReaderErrorCategoryType &) = delete; + +public: + static const ReaderErrorCategoryType &get() { return Sentinel; } + +private: + static const ReaderErrorCategoryType Sentinel; + ReaderErrorCategoryType() {} + const char *name() const LLVM_NOEXCEPT override { + return "Binary Bitcode Convert Reader"; + } + std::string message(int IndexError) const override { + switch (static_cast(IndexError)) { + case MalformedBlock: + return "Malformed block found in bitcode"; + case MalformedIRFile: + return "Malformed IR file"; + case RecordAtTopLevel: + return "Record not allowed outside block"; + case InvalidBitcodeSignature: + return "Bitcode has invalid bitcode signature"; + } + llvm_unreachable("Unknown error type!"); + } + ~ReaderErrorCategoryType() override = default; +}; + +const ReaderErrorCategoryType ReaderErrorCategoryType::Sentinel; + +std::error_code error(ReaderErrorType Error) { + return std::error_code(Error, ReaderErrorCategoryType::get()); +} + +// Parses binary bitcode records. +class BinaryRecordParser { + BinaryRecordParser(const BinaryRecordParser &) = delete; + void operator=(const BinaryRecordParser &) = delete; + +public: + BinaryRecordParser(StringRef &Input, BitcodeRecordList &Records) + : Input(Input), Records(Records) {} + + /// Read in a module block. + std::error_code readModule(); + +private: + StringRef &Input; + BitcodeRecordList &Records; + std::unique_ptr Reader; + std::unique_ptr Cursor; + + // Read block with ID. + std::error_code readBlock(unsigned ID); +}; + +std::error_code BinaryRecordParser::readModule() { + const unsigned char *BufBegin = (const unsigned char *)Input.begin(); + const unsigned char *BufEnd = (const unsigned char *)Input.end(); + if (isBitcodeWrapper(BufBegin, BufEnd)) { + constexpr bool VerifyBufferSize = true; + if (SkipBitcodeWrapperHeader(BufBegin, BufEnd, VerifyBufferSize)) + return error(MalformedIRFile); + } + + Reader = make_unique(BufBegin, BufEnd); + Cursor = make_unique(*Reader); + + // Sniff for the signature. + if (Cursor->Read(8) != 'B' || Cursor->Read(8) != 'C' || + Cursor->Read(4) != 0x0 || Cursor->Read(4) != 0xC || + Cursor->Read(4) != 0xE || Cursor->Read(4) != 0xD) + return error(InvalidBitcodeSignature); + + while (1) { + if (Cursor->AtEndOfStream()) + return error(MalformedIRFile); + + BitstreamEntry Entry = + Cursor->advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); + + if (Entry.Kind != BitstreamEntry::SubBlock) + return error(MalformedIRFile); + return readBlock(Entry.ID); + } +} + +std::error_code BinaryRecordParser::readBlock(unsigned BlockID) { + std::unique_ptr> RecordValues = + make_unique>(); + RecordValues->push_back(BLK_CODE_ENTER); + RecordValues->push_back(BlockID); + Records.push_back(std::move(RecordValues)); + Cursor->EnterSubBlock(BlockID); + + SmallVector Values; + while (1) { + BitstreamEntry Entry = Cursor->advance(); + switch (Entry.Kind) { + case BitstreamEntry::Error: + return error(MalformedBlock); + case BitstreamEntry::EndBlock: { + std::unique_ptr> RecordValues = + make_unique>(); + RecordValues->push_back(BLK_CODE_EXIT); + Records.push_back(std::move(RecordValues)); + return std::error_code(); + } + case BitstreamEntry::SubBlock: { + if (Entry.ID == bitc::BLOCKINFO_BLOCK_ID) { + if (Cursor->ReadBlockInfoBlock()) + return error(MalformedBlock); + continue; + } + if (std::error_code EC = readBlock(Entry.ID)) + return EC; + continue; + } + case BitstreamEntry::Record: { + unsigned Code = Cursor->readRecord(Entry.ID, Values); + std::unique_ptr> RecordValues = + make_unique>(); + RecordValues->push_back(Code); + for (const auto Val : Values) + RecordValues->push_back(Val); + Records.push_back(std::move(RecordValues)); + Values.clear(); + continue; + } + } + } + llvm_unreachable("Should not reach here!"); +} + +} // end of anonymous namespace + +namespace llvm { +namespace bcconv { + +std::error_code readBinaryBitcode(StringRef Input, BitcodeRecordList &Records) { + BinaryRecordParser Parser(Input, Records); + return Parser.readModule(); +} + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/Convert/BinaryBitcodeWriter.cpp =================================================================== --- /dev/null +++ lib/Bitcode/Convert/BinaryBitcodeWriter.cpp @@ -0,0 +1,105 @@ +//===- BinaryBitcodeWriter.cpp - Writes binary bitcode from records -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the binary bitcode writer when given bitcode records. +// +//===----------------------------------------------------------------------===// + +#include "BitcodeWriter.h" + +#include "llvm/Bitcode/BitstreamWriter.h" +#include + +using namespace llvm; +using namespace llvm::bcconv; + +namespace { + +// Writer of binary bitcode records to a byte buffer. +class BinaryRecordWriter : public BitcodeWriter { + BinaryRecordWriter(const BinaryRecordWriter &) = delete; + void operator=(const BinaryRecordWriter &) = delete; + +public: + BinaryRecordWriter(OutputBuffer &Buffer, std::string &ErrorMessages, + bool ErrorRecover) + : BitcodeWriter(ErrorMessages, ErrorRecover), Writer(Buffer) { + writeHeader(); + } + + ~BinaryRecordWriter() final = default; + +private: + BitstreamWriter Writer; + + void writeHeader() { + Writer.Emit((unsigned)'B', 8); + Writer.Emit((unsigned)'C', 8); + Writer.Emit(0x0, 4); + Writer.Emit(0xC, 4); + Writer.Emit(0xE, 4); + Writer.Emit(0xD, 4); + } + + void writeEnterBlock(uint64_t ID) final { + BitcodeWriter::writeEnterBlock(ID); + Writer.EnterSubblock(ID, 2); + } + + void writeExitBlock() final { + BitcodeWriter::writeExitBlock(); + Writer.ExitBlock(); + } + + void writeRecord(const BitcodeRecord &Record) final { + unsigned Code = Record[0]; + SmallVector Values; + for (size_t i = 1; i < Record.size(); ++i) + Values.push_back(Record[i]); + Writer.EmitRecord(Code, Values); + } + + bool finish() final; +}; + +bool BinaryRecordWriter::finish() { + bool Success = true; + // First try to finish gracefully. + if (!BitcodeWriter::finish()) + Success = false; + + // Force out exit blocks, if needed, to avoid the bitstream writer from assert + // failing. + while (!atOutermostScope()) + writeExitBlock(); + + // Make sure file length divisible by 4. + uint64_t TrailingBits = Writer.GetCurrentBitNo() % 4 * CHAR_BIT; + if (TrailingBits) { + recoverableError() << "Generated bitstream not word aligned\n"; + Writer.Emit(0, TrailingBits); + if (!ErrorRecover) + return false; + } + return Success; +} + +} // end of anonymous namespace + +namespace llvm { +namespace bcconv { + +bool writeBinaryBitcode(OutputBuffer &Buffer, const BitcodeRecordList &Records, + std::string &ErrorMessages, bool ErrorRecover) { + BinaryRecordWriter RecordWriter(Buffer, ErrorMessages, ErrorRecover); + return RecordWriter.writeBitcodeRecords(Records); +} + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/Convert/BitcodeWriter.h =================================================================== --- /dev/null +++ lib/Bitcode/Convert/BitcodeWriter.h @@ -0,0 +1,89 @@ +//===- BitcodeWriter.h - Driver for bitcode writers -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines the driver class for writing bitcode records into a character buffer. +// Used to write to binary, textual, and simplfied forms. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeConvert.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class raw_ostream; + +namespace bcconv { + +// Abstract bitcode record writer. +class BitcodeWriter { + BitcodeWriter(const BitcodeWriter &) = delete; + void operator=(const BitcodeWriter &) = delete; + +public: + virtual ~BitcodeWriter(); + + // Writes out Records. + bool writeBitcodeRecords(const BitcodeRecordList &Records); + +protected: + // The block ID associated with records not in any block. + static const unsigned UnknownBlockID = std::numeric_limits::max(); + // Holds error messages + std::string ErrorMessages; + // Stream used to write error messages into ErrorMessages; + raw_string_ostream ErrStream; + // True if error recovery should be applied. + bool ErrorRecover; + // Nest level of blocks within bitcode being written. + size_t BlockDepth = 0; + // The number of errors found. + size_t NumErrors = 0; + // The number of error repairs applied. + size_t NumRepairs = 0; + + explicit BitcodeWriter(std::string &ErrorMessages, bool ErrorRecover) + : ErrorMessages(ErrorMessages), ErrStream(ErrorMessages), + ErrorRecover(ErrorRecover) {} + + // Records that an error has occurred, and returns stream to print the + // corresponding error message to. + raw_ostream &error(); + + // Records that a recoverable error has occurred, and returns the stream to + // print the corresponding error message to. + raw_ostream &recoverableError(); + + bool isSuccessful() const { + return NumErrors == 0 || (ErrorRecover && NumErrors == NumRepairs); + } + + bool atOutermostScope() { return BlockDepth == 0; } + + // Emit the contents of Record. + bool emitRecord(const BitcodeRecord &Record); + + // Write out the enter record for block ID. + virtual void writeEnterBlock(uint64_t ID) { + (void)ID; + ++BlockDepth; + } + + // Write out the contents of the Record in the current block. + virtual void writeRecord(const BitcodeRecord &Record) = 0; + + // Write out the exit record for the current block. + virtual void writeExitBlock() { --BlockDepth; } + + // Apply any necessary cleanups after writing bitcode. + virtual bool finish(); +}; + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/Convert/BitcodeWriter.cpp =================================================================== --- /dev/null +++ lib/Bitcode/Convert/BitcodeWriter.cpp @@ -0,0 +1,109 @@ +//===- BitcodeWriter.cpp - Driver for bitcode writers ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the driver class for writing bitcode records into a character +// buffer. Used to write to binary, textual, and simplfied forms. +// +//===----------------------------------------------------------------------===// + +#include "BitcodeWriter.h" + +#include "llvm/Bitcode/BitCodes.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace bcconv { + +BitcodeWriter::~BitcodeWriter() { + // Force error messages into field ErrorMessages. + ErrStream.flush(); +} + +raw_ostream &BitcodeWriter::error() { + ++NumErrors; + return ErrStream; +} + +raw_ostream &BitcodeWriter::recoverableError() { + if (ErrorRecover) + ++NumRepairs; + return error(); +} + +bool BitcodeWriter::finish() { + if (atOutermostScope()) + return true; + + recoverableError() << "Missing " << BlockDepth << " close blocks.\n"; + if (!ErrorRecover) + return false; + + while (!atOutermostScope()) + writeExitBlock(); + return true; +} + +bool BitcodeWriter::writeBitcodeRecords(const BitcodeRecordList &Records) { + for (const std::unique_ptr &Rec : Records) { + if (!emitRecord(*Rec)) + return false; + } + if (!finish()) + return false; + return isSuccessful(); +} + +bool BitcodeWriter::emitRecord(const BitcodeRecord &Record) { + size_t NumValues = Record.size(); + if (NumValues == 0) { + recoverableError() << "Empty records not allowed in bitcode\n"; + return ErrorRecover; + } + switch (Record[0]) { + case BLK_CODE_ENTER: { + uint64_t WriteBlockID = UnknownBlockID; + if (NumValues != 2) { + recoverableError() << "Block enter record not size 2\n"; + if (!ErrorRecover) + return false; + } + if (NumValues > 1) + WriteBlockID = Record[1]; + if (WriteBlockID == bitc::BLOCKINFO_BLOCK_ID) { + recoverableError() << "BlockInfo block not allowed in bitcode records\n"; + if (!ErrorRecover) + return false; + WriteBlockID = UnknownBlockID; + } + writeEnterBlock(WriteBlockID); + return true; + } + case BLK_CODE_EXIT: { + if (atOutermostScope()) { + recoverableError() << "Extraneous exit block\n"; + return ErrorRecover; + } + writeExitBlock(); + return true; + } + default: { + if (atOutermostScope()) { + recoverableError() << "Record not allowed outside block\n"; + if (!ErrorRecover) + return false; + writeEnterBlock(UnknownBlockID); + } + writeRecord(Record); + return true; + } + } +} + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/Convert/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Bitcode/Convert/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_library(LLVMBitcodeConv + BinaryBitcodeReader.cpp + BinaryBitcodeWriter.cpp + BitcodeWriter.cpp + SimplifiedBitcodeReader.cpp + SimplifiedBitcodeWriter.cpp + TextualBitcodeReader.cpp + TextualBitcodeWriter.cpp + + DEPENDS + intrinsics_gen + LLVMBitReader + ) Index: lib/Bitcode/Convert/LLVMBuild.txt =================================================================== --- lib/Bitcode/Convert/LLVMBuild.txt +++ lib/Bitcode/Convert/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Bitcode/LLVMBuild.txt ------------------------------*- Conf -*--===; +;===- ./lib/Bitcode/Convert/LLVMBuild.txt ----------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,10 +15,8 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = Reader Writer - [component_0] -type = Group -name = Bitcode -parent = Libraries +type = Library +name = BitcodeConv +parent = Bitcode +required_libraries = Core Support Index: lib/Bitcode/Convert/Makefile =================================================================== --- lib/Bitcode/Convert/Makefile +++ lib/Bitcode/Convert/Makefile @@ -1,4 +1,4 @@ -##===- lib/Bitcode/Makefile --------------------------------*- Makefile -*-===## +##===- lib/Bitcode/Convert/Makefile ------------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,8 +7,8 @@ # ##===----------------------------------------------------------------------===## -LEVEL = ../.. -PARALLEL_DIRS = Reader Writer +LEVEL = ../../.. +LIBRARYNAME = LLVMBitcodeConv +BUILD_ARCHIVE = 1 include $(LEVEL)/Makefile.common - Index: lib/Bitcode/Convert/SimplifiedBitcodeReader.cpp =================================================================== --- /dev/null +++ lib/Bitcode/Convert/SimplifiedBitcodeReader.cpp @@ -0,0 +1,165 @@ +//===- TextualBitcodeReader.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements a reader that takes the the textual form of bitcode records, and +// generates the corresponding sequence of bitcode records. +// +// ===---------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeConvert.h" + +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; +using namespace llvm::bcconv; + +namespace { + +enum ReaderErrorType { NoRecordCode = 1, MalformedInput }; + +// Defines reader error codes. +enum SimplifiedActionKind { + AcceptAsIsKind, + AcceptValueKind, + AcceptRecordKind, + AcceptEscapeKind +}; + +// Defines the corresponding error messages. +class ReaderErrorCategoryType : public std::error_category { + ReaderErrorCategoryType(const ReaderErrorCategoryType &) = delete; + void operator=(const ReaderErrorCategoryType &) = delete; + +public: + static const ReaderErrorCategoryType &get() { return Sentinel; } + +private: + static const ReaderErrorCategoryType Sentinel; + ReaderErrorCategoryType() {} + const char *name() const LLVM_NOEXCEPT override { + return "Simplified Bitcode Convert Reader"; + } + std::string message(int IndexError) const override { + switch (static_cast(IndexError)) { + case NoRecordCode: + return "No record code for bitcode record"; + case MalformedInput: + return "Malformed input. Doesn't end with accept record byte."; + } + llvm_unreachable("Unknown error type!"); + } + ~ReaderErrorCategoryType() override = default; +}; + +const ReaderErrorCategoryType ReaderErrorCategoryType::Sentinel; + +static const size_t NumByteValues = + std::numeric_limits::max() + (size_t)1; + +class SimplifiedBitcodeReader { +public: + SimplifiedBitcodeReader(StringRef &Input, BitcodeRecordList &Records, + const SimplifiedBitcodeActions &Actions) + : Input(Input), Records(Records), Actions(Actions) { + init(); + } + + std::error_code read() { + std::vector Values; + int64_t Number = 0; + bool FoundNumber = false; + while (Cursor < Input.size()) { + uint8_t Byte = Input[Cursor++]; + switch (ByteAction[Byte]) { + case AcceptEscapeKind: + if (Cursor == Input.size()) + return std::error_code(MalformedInput, + ReaderErrorCategoryType::get()); + Byte = Input[Cursor++]; + // Intentionally drop to the next case. + case AcceptAsIsKind: + Number = (Number << 8) + Byte; + FoundNumber = true; + break; + case AcceptValueKind: + Values.push_back(signRotatedValue(Number)); + Number = 0; + FoundNumber = false; + break; + case AcceptRecordKind: + if (Values.empty()) + return std::error_code(NoRecordCode, ReaderErrorCategoryType::get()); + Records.push_back(make_unique(Values)); + Values.clear(); + Number = 0; + FoundNumber = false; + break; + } + } + // Add value/record terminators if applicable. + if (FoundNumber) + Values.push_back(signRotatedValue(Number)); + + if (!Values.empty()) + Records.push_back(make_unique(Values)); + return std::error_code(); + } + +private: + // The input to parse. + StringRef &Input; + // The list of bitcode records to generate. + BitcodeRecordList &Records; + const SimplifiedBitcodeActions &Actions; + // The current location within the input. + size_t Cursor = 0; + SimplifiedActionKind ByteAction[NumByteValues]; + + void init() { + for (size_t i = 0; i < NumByteValues; ++i) + ByteAction[i] = AcceptAsIsKind; + ByteAction[Actions.AcceptValue] = AcceptValueKind; + ByteAction[Actions.AcceptRecord] = AcceptRecordKind; + ByteAction[Actions.AcceptEscape] = AcceptEscapeKind; + } + + // signed value stored with the sign bit in the LSB for dense + // encoding. + uint64_t signRotatedValue(uint64_t V) { + if ((V & 1) == 0) + return V >> 1; + if (V != 1) + return -(V >> 1); + // There is no such thing as -0 with integers. "-0" really means MININT. + return 1ULL << 63; + } +}; + +} // end of anonymous namespace + +namespace llvm { +namespace bcconv { + +const uint8_t SimplifiedBitcodeActions::AcceptValueDefault = 0xAA; +const uint8_t SimplifiedBitcodeActions::AcceptRecordDefault = 0xBA; +const uint8_t SimplifiedBitcodeActions::AcceptEscapeDefault = 0xBB; + +const SimplifiedBitcodeActions SimplifiedBitcodeActions::Default; + +SimplifiedBitcodeActions::SimplifiedBitcodeActions() {} + +std::error_code readSimplifiedBitcode(StringRef Input, + BitcodeRecordList &Records, + const SimplifiedBitcodeActions &Actions) { + SimplifiedBitcodeReader Reader(Input, Records, Actions); + return Reader.read(); +} + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/Convert/SimplifiedBitcodeWriter.cpp =================================================================== --- /dev/null +++ lib/Bitcode/Convert/SimplifiedBitcodeWriter.cpp @@ -0,0 +1,121 @@ +//===- SimplifiedBitcodeWriter.cpp - Writes simp bitcode from records -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the simplified bitcode writer when given bitcode records. +// +//===----------------------------------------------------------------------===// + +#include "BitcodeWriter.h" +#include + +using namespace llvm; +using namespace llvm::bcconv; + +namespace { + +class SimplifiedBitcodeWriter : public BitcodeWriter { + SimplifiedBitcodeWriter(const SimplifiedBitcodeWriter &) = delete; + void operator=(const SimplifiedBitcodeWriter &) = delete; + +public: + SimplifiedBitcodeWriter(OutputBuffer &Buffer, + const SimplifiedBitcodeActions &Actions, + std::string &ErrorMessages, bool ErrorRecover) + : BitcodeWriter(ErrorMessages, ErrorRecover), Buffer(Buffer), + Actions(Actions) { + init(); + } + + ~SimplifiedBitcodeWriter() final = default; + +private: + // Buffer to write textual bitcode records into. + OutputBuffer &Buffer; + const SimplifiedBitcodeActions &Actions; + // Defines what byte values need to be prefixed with an escape action. + static constexpr size_t NumByteValues = 1 << CHAR_BIT; + bool NeedsEscape[NumByteValues]; + + void init() { + assert(std::numeric_limits::max() < NumByteValues); + for (size_t i = 0; i < NumByteValues; ++i) + NeedsEscape[i] = false; + NeedsEscape[Actions.AcceptValue] = true; + NeedsEscape[Actions.AcceptRecord] = true; + NeedsEscape[Actions.AcceptEscape] = true; + } + + void writeValue(uint64_t Value); + + void writeTerminator() { Buffer.push_back(Actions.AcceptRecord); } + + void writeEnterBlock(uint64_t ID) final { + BitcodeWriter::writeEnterBlock(ID); + writeValue(BLK_CODE_ENTER); + writeValue(ID); + writeTerminator(); + } + + void writeExitBlock() final { + BitcodeWriter::writeExitBlock(); + writeValue(BLK_CODE_EXIT); + writeTerminator(); + } + + void writeRecord(const BitcodeRecord &Record) final { + for (const auto Value : Record) + writeValue(Value); + writeTerminator(); + } + + // signed value stored with the sign bit moved to the LSB for dense + // encoding. + uint64_t rotateSign(uint64_t U) { + int64_t I = U; + return I < 0 ? ~(U << 1) : U << 1; + } +}; + +static const int LeftmostShift = 7 * CHAR_BIT; +static const uint64_t LeftmostByteMask = ((uint64_t)0xFF) << LeftmostShift; + +void SimplifiedBitcodeWriter::writeValue(uint64_t Value) { + Value = rotateSign(Value); + bool WroteByte = false; + int Shift = LeftmostShift; + uint64_t Mask = LeftmostByteMask; + while (Mask) { + uint8_t Byte = (Value & Mask) >> Shift; + if (Byte || WroteByte) { + if (NeedsEscape[Byte]) + Buffer.push_back(Actions.AcceptEscape); + Buffer.push_back(Byte); + WroteByte = true; + } + Mask >>= CHAR_BIT; + Shift -= CHAR_BIT; + } + Buffer.push_back(Actions.AcceptValue); +} + +} // end of anonymous namespace + +namespace llvm { +namespace bcconv { + +bool writeSimplifiedBitcode(OutputBuffer &Output, + const BitcodeRecordList &Records, + const SimplifiedBitcodeActions &Actions, + std::string &ErrorMessages, bool ErrorRecover) { + SimplifiedBitcodeWriter Writer(Output, Actions, ErrorMessages, ErrorRecover); + return Writer.writeBitcodeRecords(Records); +} + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/Convert/TextualBitcodeReader.cpp =================================================================== --- /dev/null +++ lib/Bitcode/Convert/TextualBitcodeReader.cpp @@ -0,0 +1,242 @@ +//===- TextualBitcodeReader.cpp - Reads textual bitcode -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the textual bitcode reader that builds the corresponding list of +// bitcode records. +// +// ===---------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeConvert.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitstreamReader.h" +#include "llvm/Bitcode/ReaderWriter.h" + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::bcconv; + +namespace { + +// Defines reader error codes. +enum ReaderErrorType { + NoCodeForRecord = 1, + NoValueAfterSeparator, + NoSeparatorOrTerminator, + NoNewlineAfterTerminator +}; + +// Defines the corresponding error messages. +class ReaderErrorCategoryType : public std::error_category { + ReaderErrorCategoryType(const ReaderErrorCategoryType &) = delete; + void operator=(const ReaderErrorCategoryType &) = delete; + +public: + static const ReaderErrorCategoryType &get() { return Sentinel; } + +private: + static const ReaderErrorCategoryType Sentinel; + ReaderErrorCategoryType() {} + const char *name() const LLVM_NOEXCEPT override { + return "Textual Bitcode Convert Reader"; + } + std::string message(int IndexError) const override { + switch (static_cast(IndexError)) { + case NoCodeForRecord: + return "Bitcode record doesn't begin with a record code"; + case NoValueAfterSeparator: + return "Value expected after separator, but not found"; + case NoSeparatorOrTerminator: + return "Separator/terminator expected after value"; + case NoNewlineAfterTerminator: + return "Newline expecded after terminating semicolon"; + } + llvm_unreachable("Unknown error type!"); + } + ~ReaderErrorCategoryType() override = default; +}; + +const ReaderErrorCategoryType ReaderErrorCategoryType::Sentinel; + +std::error_code error(ReaderErrorType Error) { + return std::error_code(Error, ReaderErrorCategoryType::get()); +} + +/// Parses text bitcode records, putting them in Records. +class TextRecordParser { + TextRecordParser(const TextRecordParser &) = delete; + void operator=(const TextRecordParser &) = delete; + +public: + /// Creates a parser to parse records from the Input, and put them into + /// Records. If AllowComments is true, relax the input rules to allow comments + /// as well. + TextRecordParser(StringRef &Input, BitcodeRecordList &Records, + bool AllowComments) + : Input(Input), Records(Records), AllowComments(AllowComments) {} + + /// Reads in the list of bitcode records in the input buffer. + std::error_code read(); + +private: + // The input to parse. + StringRef &Input; + // The list of bitcode records to generate. + BitcodeRecordList &Records; + // Allow comments in bitcode file. + bool AllowComments; + // The current location within the input. + size_t Cursor = 0; + // The separator character. + static const char *Separator; + // The terminator character. + static const char *Terminator; + // The newline character that must follow a terminator. + static const char *Newline; + // Valid digits that can be used to define numbers. + static const char *Digits; + + // Returns true if we have reached the end of the input. + bool atEof() const { return Cursor == Input.size(); } + + // Tries to read a character in the given set of Characters. Returns the + // character found, or 0 if not found. If a character is not found, it will + // not advance the cursor. + char readChar(const char *Characters); + + // Tries to read a (integral) number. If successful, Value is set to + // the parsed number and returns true. Otherwise false is returned. + // Does not check for number overflow. + bool readNumber(uint64_t &Value); + + // Skip empty lines, and lines beginning with a semicolin. + bool skipWhitespaceLine(); + + // Skip rest of line. + void skipComment() { + while (!atEof()) { + char Ch = Input[Cursor++]; + if (Ch == *Newline) + return; + } + } + + // Reads a record from the input. + std::error_code readRecord(); +}; + +const char *TextRecordParser::Newline = "\n"; +const char *TextRecordParser::Separator = ","; +const char *TextRecordParser::Terminator = ";"; +const char *TextRecordParser::Digits = "0123456789"; + +std::error_code TextRecordParser::read() { + while (!atEof()) { + if (AllowComments) + while (skipWhitespaceLine()) + ; + if (std::error_code EC = readRecord()) + return EC; + } + return std::error_code(); +} + +bool TextRecordParser::skipWhitespaceLine() { + if (readChar(Newline)) + return true; + if (readChar(Terminator)) { + skipComment(); + return true; + } + return false; +} + +char TextRecordParser::readChar(const char *Characters) { + if (atEof()) + return 0; + char Ch = Input[Cursor]; + if (std::strchr(Characters, Ch) == 0) + return 0; + ++Cursor; + return Ch; +} + +bool TextRecordParser::readNumber(uint64_t &Value) { + Value = 0; + bool NumberFound = false; + while (1) { + char Ch = readChar(Digits); + if (!Ch) + return NumberFound; + Value = (Value * 10) + (Ch - '0'); + NumberFound = true; + } +} + +std::error_code TextRecordParser::readRecord() { + // States of parser used to parse bitcode records. + enum ParseState { + // Begin parsing a record. + StartParse, + // Before a value in the record. + BeforeValue, + // Immediately after a value in the record. + AfterValue + } State = StartParse; + std::vector Values; + uint64_t Number = 0; + while (1) { + switch (State) { + case StartParse: + if (!readNumber(Number)) { + if (atEof()) + return std::error_code(); + return error(NoCodeForRecord); + } + Values.push_back(Number); + State = AfterValue; + continue; + case BeforeValue: + if (!readNumber(Number)) + return error(NoValueAfterSeparator); + Values.push_back(Number); + State = AfterValue; + continue; + case AfterValue: + if (readChar(Separator)) { + State = BeforeValue; + continue; + } + if (readChar(Terminator)) { + if (AllowComments) + skipComment(); + else if (!readChar(Newline)) + return error(NoNewlineAfterTerminator); + Records.push_back(make_unique(Values)); + return std::error_code(); + } + return error(NoSeparatorOrTerminator); + } + } +} + +} // end of anonymous namespace + +namespace llvm { +namespace bcconv { + +std::error_code readTextualBitcode(StringRef Input, BitcodeRecordList &Records, + bool AllowComments) { + TextRecordParser Reader(Input, Records, AllowComments); + return Reader.read(); +} + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/Convert/TextualBitcodeWriter.cpp =================================================================== --- /dev/null +++ lib/Bitcode/Convert/TextualBitcodeWriter.cpp @@ -0,0 +1,99 @@ +//===- TextualBitcodeWriter.cpp - Writes textual bitcode from records -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the textual bitcode writer when given bitcode records. +// +//===----------------------------------------------------------------------===// + +#include "BitcodeWriter.h" + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::bcconv; + +namespace { + +// Writer of textual bitcode records to a byte buffer. +class TextRecordWriter : public BitcodeWriter { + TextRecordWriter(const TextRecordWriter &) = delete; + void operator=(const TextRecordWriter &) = delete; + +public: + TextRecordWriter(OutputBuffer &Buffer, std::string &ErrorMessages, + bool ErrorRecover) + : BitcodeWriter(ErrorMessages, ErrorRecover), Buffer(Buffer), + ValueStream(ValueBuffer) {} + + ~TextRecordWriter() final = default; + +private: + // Buffer to write textual bitcode records into. + OutputBuffer &Buffer; + // String stream to convert integers to strings. + std::string ValueBuffer; + raw_string_ostream ValueStream; + + void writeValue(uint64_t Value); + + void writeSeparator() { Buffer.push_back(','); } + + void writeTerminator() { + Buffer.push_back(';'); + Buffer.push_back('\n'); + } + + void writeEnterBlock(uint64_t ID) final { + BitcodeWriter::writeEnterBlock(ID); + writeValue(BLK_CODE_ENTER); + writeSeparator(); + writeValue(ID); + writeTerminator(); + } + + void writeExitBlock() final { + BitcodeWriter::writeExitBlock(); + writeValue(BLK_CODE_EXIT); + writeTerminator(); + } + + void writeRecord(const BitcodeRecord &Record) final { + bool IsFirst = true; + for (const auto Value : Record) { + if (IsFirst) + IsFirst = false; + else + writeSeparator(); + writeValue(Value); + } + writeTerminator(); + } +}; + +void TextRecordWriter::writeValue(uint64_t Value) { + ValueStream << Value; + ValueStream.flush(); + for (auto ch : ValueBuffer) + Buffer.push_back(ch); + ValueBuffer.clear(); +} + +} // end of anonymous namespace + +namespace llvm { +namespace bcconv { + +bool writeTextualBitcode(OutputBuffer &Buffer, const BitcodeRecordList &Records, + std::string &ErrorMessages, bool ErrorRecover) { + TextRecordWriter RecordWriter(Buffer, ErrorMessages, ErrorRecover); + return RecordWriter.writeBitcodeRecords(Records); +} + +} // end of namespace llvm::bcconv +} // end of namespace llvm Index: lib/Bitcode/LLVMBuild.txt =================================================================== --- lib/Bitcode/LLVMBuild.txt +++ lib/Bitcode/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Reader Writer +subdirectories = Convert Reader Writer [component_0] type = Group Index: lib/Bitcode/Makefile =================================================================== --- lib/Bitcode/Makefile +++ lib/Bitcode/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. -PARALLEL_DIRS = Reader Writer +PARALLEL_DIRS = Convert Reader Writer include $(LEVEL)/Makefile.common Index: test/Bitcode/text-bitcode.ll =================================================================== --- /dev/null +++ test/Bitcode/text-bitcode.ll @@ -0,0 +1,42 @@ +; Tests that we can convert LLVM bitcode to textual form, and back to binary. + +; RUN: llvm-as %s -o - | llvm-bcconv -from-binary -to-text \ +; RUN: | FileCheck --check-prefix=TBC %s + +; RUN: llvm-as %s -o - | llvm-bcconv -from-binary -to-text \ +; RUN: | llvm-bcconv -from-text -to-binary \ +; RUN: | llvm-dis | FileCheck %s + +; RUN: llvm-as %s -o - | llvm-bcconv -from-binary -to-simplified \ +; RUN: | llvm-bcconv -from-simplified -to-binary \ +; RUN: | llvm-dis | FileCheck %s + +define void @f() { + ret void +} + +; CHECK: define void @f() { +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; TBC: 65535,8; +; TBC-NEXT: 1,1; +; TBC-NEXT: 65535,17; +; TBC-NEXT: 1,4; +; TBC-NEXT: 2; +; TBC-NEXT: 21,0,0; +; TBC-NEXT: 8,1,0; +; TBC-NEXT: 16; +; TBC-NEXT: 65534; +; TBC-NEXT: 8,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0; +; TBC-NEXT: 65535,15; +; Intentionally skip contents of metadata block. +; TBC: 65534; +; TBC-NEXT: 65535,14; +; TBC-NEXT: 1,0,102; +; TBC-NEXT: 65534; +; TBC-NEXT: 65535,12; +; TBC-NEXT: 1,1; +; TBC-NEXT: 10; +; TBC-NEXT: 65534; +; TBC-NEXT: 65534; Index: test/Convert/MyMain.cpp =================================================================== --- /dev/null +++ test/Convert/MyMain.cpp @@ -0,0 +1,88 @@ +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitcode/BitcodeConvert.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +extern "C" { +#include +} + +using namespace llvm; +using namespace llvm::bcconv; + +static jmp_buf JmpBuf; + +namespace { + +void MyBitcodeDiagnosticHandler(const DiagnosticInfo&Info) { + // Do nothing, so that parser returns error code. +} + +void MyFatalErrorHandler(void *user_data, const std::string& reason, + bool gen_crash_diag) { + // Don't bother printing reason, just return to the test function, + // since a fatal error represents a successful parse (i.e. it correctly + // terminated with an error message to the user). + longjmp(JmpBuf, 1); +} + +static bool InstalledHandler = false; + +} // end of anonymous namespace + +static size_t TestCount = 0; +static size_t TextReadable = 0; +static size_t BinaryWritable = 0; +static size_t Interesting = 0; + +extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (TestCount % 1000000 == 0) + errs() << "Ran " << TestCount << " tests, " + << TextReadable << " text readable, " + << BinaryWritable << " binary writable, " + << Interesting << " interesting!\n"; + + ++TestCount; + + // Allocate space for locals before setjmp so that memory can be collected + // if parse exits prematurely. + StringRef Input((const char *)Data, Size); + BitcodeRecordList Records; + SimplifiedBitcodeActions Actions; + SmallVector Binary; + std::string ErrorMessages; + + if (setjmp(JmpBuf)) + // If reached, we have returned with non-zero status, so exit. + return; + + if (!InstalledHandler) { + llvm::install_fatal_error_handler(::MyFatalErrorHandler, nullptr); + InstalledHandler = true; + } + + // convert Data to bitcode records. + if (readSimplifiedBitcode(Input, Records, Actions)) + return; + + ++TextReadable; + + // Convert bitcode records to binary bitcode representation. + if (!writeBinaryBitcode(Binary, Records, ErrorMessages, true)) + return; + + ++BinaryWritable; + + // read generated binary bitcode to generate module. + StringRef RealInput(Binary.data(), Binary.size()); + ErrorOr> Result = + getLazyBitcodeModule(MemoryBuffer::getMemBuffer(RealInput, "", false), + getGlobalContext(), + (DiagnosticHandlerFunction)::MyBitcodeDiagnosticHandler); + + if (!Result.getError()) + ++Interesting; +} Index: test/Convert/crash1.err =================================================================== --- /dev/null +++ test/Convert/crash1.err @@ -0,0 +1,31 @@ +$ llvm-dis -disable-output crash1.bc +terminate called after throwing an instance of 'std::length_error' + what(): vector::_M_default_append +#0 0x5eeeac llvm::sys::PrintStackTrace(llvm::raw_ostream&) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5eeeac) +#1 0x5ef1c1 PrintStackTraceSignalHandler(void*) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ef1c1) +#2 0x5ed93d llvm::sys::RunSignalHandlers() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ed93d) +#3 0x5ede14 SignalHandler(int) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ede14) +#4 0x7f97dc373340 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x10340) +#5 0x7f97db7b4cc9 gsignal /build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0 +#6 0x7f97db7b80d8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0 +#7 0x7f97dc0bf535 __gnu_cxx::__verbose_terminate_handler() (/usr/lib/x86_64-linux-gnu/libstdc++.so.6+0x60535) +#8 0x7f97dc0bd6d6 (/usr/lib/x86_64-linux-gnu/libstdc++.so.6+0x5e6d6) +#9 0x7f97dc0bd703 (/usr/lib/x86_64-linux-gnu/libstdc++.so.6+0x5e703) +#10 0x7f97dc0bd922 (/usr/lib/x86_64-linux-gnu/libstdc++.so.6+0x5e922) +#11 0x7f97dc10f387 std::__throw_length_error(char const*) (/usr/lib/x86_64-linux-gnu/libstdc++.so.6+0xb0387) +#12 0x441322 std::vector >::_M_check_len(unsigned long, char const*) const (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x441322) +#13 0x439466 std::vector >::_M_default_append(unsigned long) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x439466) +#14 0x430745 std::vector >::resize(unsigned long) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x430745) +#15 0x41474a (anonymous namespace)::BitcodeReader::parseTypeTableBody() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x41474a) +#16 0x414445 (anonymous namespace)::BitcodeReader::parseTypeTable() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x414445) +#17 0x420138 (anonymous namespace)::BitcodeReader::parseModule(bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x420138) +#18 0x4225af (anonymous namespace)::BitcodeReader::parseBitcodeInto(std::unique_ptr >, llvm::Module*, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4225af) +#19 0x42c3dc getBitcodeModuleImpl(std::unique_ptr >, llvm::StringRef, (anonymous namespace)::BitcodeReader*, llvm::LLVMContext&, bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c3dc) +#20 0x42c667 llvm::getStreamedBitcodeModule(llvm::StringRef, std::unique_ptr >, llvm::LLVMContext&, std::function) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c667) +#21 0x406132 main (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x406132) +#22 0x7f97db79fec5 __libc_start_main /build/buildd/eglibc-2.19/csu/libc-start.c:321:0 +#23 0x4045f9 _start (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4045f9) +Stack dump: +0. Program arguments: llvm-dis -disable-output crash1.bc +Aborted + Index: test/Convert/crash4.err =================================================================== --- /dev/null +++ test/Convert/crash4.err @@ -0,0 +1,24 @@ +$ llvm-dis -disable-output crash4.bc +llvm-dis: /workspace/llvm-dev/llvm/lib/IR/Type.cpp:337: llvm::FunctionType::FunctionType(llvm::Type*, llvm::ArrayRef, bool): Assertion `isValidReturnType(Result) && "invalid return type for function"' failed. +#0 0x5eeeac llvm::sys::PrintStackTrace(llvm::raw_ostream&) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5eeeac) +#1 0x5ef1c1 PrintStackTraceSignalHandler(void*) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ef1c1) +#2 0x5ed93d llvm::sys::RunSignalHandlers() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ed93d) +#3 0x5ede14 SignalHandler(int) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ede14) +#4 0x7f3114582340 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x10340) +#5 0x7f31139c3cc9 gsignal /build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0 +#6 0x7f31139c70d8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0 +#7 0x7f31139bcb86 __assert_fail_base /build/buildd/eglibc-2.19/assert/assert.c:92:0 +#8 0x7f31139bcc32 (/lib/x86_64-linux-gnu/libc.so.6+0x2fc32) +#9 0x58b222 llvm::FunctionType::FunctionType(llvm::Type*, llvm::ArrayRef, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x58b222) +#10 0x58b421 llvm::FunctionType::get(llvm::Type*, llvm::ArrayRef, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x58b421) +#11 0x414f5f (anonymous namespace)::BitcodeReader::parseTypeTableBody() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x414f5f) +#12 0x414445 (anonymous namespace)::BitcodeReader::parseTypeTable() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x414445) +#13 0x420138 (anonymous namespace)::BitcodeReader::parseModule(bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x420138) +#14 0x4225af (anonymous namespace)::BitcodeReader::parseBitcodeInto(std::unique_ptr >, llvm::Module*, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4225af) +#15 0x42c3dc getBitcodeModuleImpl(std::unique_ptr >, llvm::StringRef, (anonymous namespace)::BitcodeReader*, llvm::LLVMContext&, bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c3dc) +#16 0x42c667 llvm::getStreamedBitcodeModule(llvm::StringRef, std::unique_ptr >, llvm::LLVMContext&, std::function) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c667) +#17 0x406132 main (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x406132) +#18 0x7f31139aeec5 __libc_start_main /build/buildd/eglibc-2.19/csu/libc-start.c:321:0 +#19 0x4045f9 _start (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4045f9) +Stack dump: +0. Program arguments: llvm-dis -disable-output crash4.bc Index: test/Convert/crash5.err =================================================================== --- /dev/null +++ test/Convert/crash5.err @@ -0,0 +1,24 @@ +$ llvm-dis -disable-output crash5.bc +llvm-dis: /workspace/llvm-dev/llvm/lib/Bitcode/Reader/BitcodeReader.cpp:1018: void decodeLLVMAttributesForBitcode(llvm::AttrBuilder&, uint64_t): Assertion `(!Alignment || isPowerOf2_32(Alignment)) && "Alignment must be a power of two."' failed. +#0 0x5eeeac llvm::sys::PrintStackTrace(llvm::raw_ostream&) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5eeeac) +#1 0x5ef1c1 PrintStackTraceSignalHandler(void*) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ef1c1) +#2 0x5ed93d llvm::sys::RunSignalHandlers() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ed93d) +#3 0x5ede14 SignalHandler(int) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ede14) +#4 0x7f272d7f0340 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x10340) +#5 0x7f272cc31cc9 gsignal /build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0 +#6 0x7f272cc350d8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0 +#7 0x7f272cc2ab86 __assert_fail_base /build/buildd/eglibc-2.19/assert/assert.c:92:0 +#8 0x7f272cc2ac32 (/lib/x86_64-linux-gnu/libc.so.6+0x2fc32) +#9 0x413258 decodeLLVMAttributesForBitcode(llvm::AttrBuilder&, unsigned long) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x413258) +#10 0x41358f (anonymous namespace)::BitcodeReader::parseAttributeBlock() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x41358f) +#11 0x4200a0 (anonymous namespace)::BitcodeReader::parseModule(bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4200a0) +#12 0x4225af (anonymous namespace)::BitcodeReader::parseBitcodeInto(std::unique_ptr >, llvm::Module*, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4225af) +#13 0x42c3dc getBitcodeModuleImpl(std::unique_ptr >, llvm::StringRef, (anonymous namespace)::BitcodeReader*, llvm::LLVMContext&, bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c3dc) +#14 0x42c667 llvm::getStreamedBitcodeModule(llvm::StringRef, std::unique_ptr >, llvm::LLVMContext&, std::function) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c667) +#15 0x406132 main (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x406132) +#16 0x7f272cc1cec5 __libc_start_main /build/buildd/eglibc-2.19/csu/libc-start.c:321:0 +#17 0x4045f9 _start (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4045f9) +Stack dump: +0. Program arguments: llvm-dis -disable-output crash5.bc +Aborted + Index: test/Convert/crash6.err =================================================================== --- /dev/null +++ test/Convert/crash6.err @@ -0,0 +1,26 @@ +$ llvm-dis -disable-output crash6.bc +llvm-dis: /workspace/llvm-dev/llvm/include/llvm/IR/Type.h:106: void llvm::Type::setSubclassData(unsigned int): Assertion `getSubclassData() == val && "Subclass data too large for field"' failed. +#0 0x5eeeac llvm::sys::PrintStackTrace(llvm::raw_ostream&) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5eeeac) +#1 0x5ef1c1 PrintStackTraceSignalHandler(void*) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ef1c1) +#2 0x5ed93d llvm::sys::RunSignalHandlers() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ed93d) +#3 0x5ede14 SignalHandler(int) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x5ede14) +#4 0x7f42b671f340 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x10340) +#5 0x7f42b5b60cc9 gsignal /build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0 +#6 0x7f42b5b640d8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0 +#7 0x7f42b5b59b86 __assert_fail_base /build/buildd/eglibc-2.19/assert/assert.c:92:0 +#8 0x7f42b5b59c32 (/lib/x86_64-linux-gnu/libc.so.6+0x2fc32) +#9 0x55be26 llvm::Type::setSubclassData(unsigned int) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x55be26) +#10 0x58c88b llvm::PointerType::PointerType(llvm::Type*, unsigned int) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x58c88b) +#11 0x58c833 llvm::PointerType::get(llvm::Type*, unsigned int) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x58c833) +#12 0x414ac3 (anonymous namespace)::BitcodeReader::parseTypeTableBody() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x414ac3) +#13 0x414445 (anonymous namespace)::BitcodeReader::parseTypeTable() (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x414445) +#14 0x420138 (anonymous namespace)::BitcodeReader::parseModule(bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x420138) +#15 0x4225af (anonymous namespace)::BitcodeReader::parseBitcodeInto(std::unique_ptr >, llvm::Module*, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4225af) +#16 0x42c3dc getBitcodeModuleImpl(std::unique_ptr >, llvm::StringRef, (anonymous namespace)::BitcodeReader*, llvm::LLVMContext&, bool, bool) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c3dc) +#17 0x42c667 llvm::getStreamedBitcodeModule(llvm::StringRef, std::unique_ptr >, llvm::LLVMContext&, std::function) (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x42c667) +#18 0x406132 main (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x406132) +#19 0x7f42b5b4bec5 __libc_start_main /build/buildd/eglibc-2.19/csu/libc-start.c:321:0 +#20 0x4045f9 _start (/workspace/llvm-dev/build-bcconv/bin/llvm-dis+0x4045f9) +Stack dump: +0. Program arguments: llvm-dis -disable-output crash6.bc +Aborted Index: test/Convert/foo.ll =================================================================== --- /dev/null +++ test/Convert/foo.ll @@ -0,0 +1,15 @@ +; ModuleID = 'send/foo.bc' + +define i32 @fib(i32 %p0) { + %v0 = icmp ult i32 %p0, 1 + br i1 %v0, label %true, label %false + +true: ; preds = %0 + ret i32 1 + +false: ; preds = %0 + %v2 = sub i32 %p0, 1 + %v3 = call i32 @fib(i32 %v2) + %v4 = add i32 %v3, %p0 + ret i32 %v4 +} Index: test/lit.cfg =================================================================== --- test/lit.cfg +++ test/lit.cfg @@ -227,6 +227,7 @@ r"\bllvm-ar\b", r"\bllvm-as\b", r"\bllvm-bcanalyzer\b", + r"\bllvm-bcconv\b", r"\bllvm-config\b", r"\bllvm-cov\b", r"\bllvm-cxxdump\b", Index: tools/LLVMBuild.txt =================================================================== --- tools/LLVMBuild.txt +++ tools/LLVMBuild.txt @@ -24,6 +24,7 @@ llvm-ar llvm-as llvm-bcanalyzer + llvm-bcconv llvm-cov llvm-diff llvm-dis Index: tools/Makefile =================================================================== --- tools/Makefile +++ tools/Makefile @@ -28,8 +28,8 @@ # in parallel builds. Please retain this ordering. DIRS := llvm-config PARALLEL_DIRS := opt llvm-as llvm-dis llc llvm-ar llvm-nm llvm-link \ - lli llvm-extract llvm-mc bugpoint llvm-bcanalyzer llvm-diff \ - macho-dump llvm-objdump llvm-readobj llvm-rtdyld \ + lli llvm-extract llvm-mc bugpoint llvm-bcanalyzer llvm-bcconv \ + llvm-diff macho-dump llvm-objdump llvm-readobj llvm-rtdyld \ llvm-dwarfdump llvm-cov llvm-size llvm-stress llvm-mcmarkup \ llvm-profdata llvm-symbolizer obj2yaml yaml2obj llvm-c-test \ llvm-cxxdump verify-uselistorder dsymutil llvm-pdbdump Index: tools/llvm-bcconv/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-bcconv/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS + BitcodeConv + BitReader + BitWriter + Support + ) + +add_llvm_tool(llvm-bcconv + llvm-bcconv.cpp + ) Index: tools/llvm-bcconv/LLVMBuild.txt =================================================================== --- tools/llvm-bcconv/LLVMBuild.txt +++ tools/llvm-bcconv/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Bitcode/LLVMBuild.txt ------------------------------*- Conf -*--===; +;===- ./tools/llvm-bcconv/LLVMBuild.txt ------------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,10 +15,8 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = Reader Writer - [component_0] -type = Group -name = Bitcode -parent = Libraries +type = Tool +name = llvm-bcconv +parent = Tools +required_libraries = BitcodeConv BitReader BitWriter Support Index: tools/llvm-bcconv/Makefile =================================================================== --- tools/llvm-bcconv/Makefile +++ tools/llvm-bcconv/Makefile @@ -1,14 +1,17 @@ -##===- lib/Bitcode/Makefile --------------------------------*- Makefile -*-===## -# +##===- tools/llvm-bcconv/Makefile -------------------------*- Makefile -*-====## +# # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. -# +# ##===----------------------------------------------------------------------===## -LEVEL = ../.. -PARALLEL_DIRS = Reader Writer +LEVEL := ../.. +TOOLNAME := llvm-bcconv +LINK_COMPONENTS := BitcodeConv BitReader BitWriter Support -include $(LEVEL)/Makefile.common +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS := 1 +include $(LEVEL)/Makefile.common Index: tools/llvm-bcconv/llvm-bcconv.cpp =================================================================== --- /dev/null +++ tools/llvm-bcconv/llvm-bcconv.cpp @@ -0,0 +1,182 @@ +//===-- llvm-bcconv.cpp - Bitcode format converter ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool converts bitcode files from one format, to another. Formats +// are binary, textual, and simplified. See file "llvm/Bitcode/BitcodeConvert.h" +// for definitions of these bitcode file formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeConvert.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::bcconv; + +namespace { + +static cl::opt + InputFilename(cl::Positional, cl::desc(""), cl::init("-")); + +static cl::opt OutputFilename( + "o", cl::desc("Write output to specified filename (rather than stdout)"), + cl::init("-")); + +// Form to read/write. +enum BitcodeForm { + Binary, + Textual, + Simplified +}; + +cl::opt InputForm( + cl::Required, cl::desc("Input bitcode form:"), + cl::values(clEnumValN(Binary, "from-binary", + "Read a binary bitcode file"), + clEnumValN(Textual, "from-text", + "Read a textual bitcode file (not .ll file)"), + clEnumValN(Simplified, "from-simplified", + "Read a simplified bitcode file"), + clEnumValEnd)); + +cl::opt OutputForm( + cl::Required, cl::desc("Output bitcode form:"), + cl::values(clEnumValN(Binary, "to-binary", + "Write a binary bitcode file"), + clEnumValN(Textual, "to-text", + "Write a textual bitcode file (not .ll file)"), + clEnumValN(Simplified, "to-simplified", + "Write a simplified bitcode file"), + clEnumValEnd)); + +static cl::opt + AllowTextComments("allow-comments", + cl::desc("Allow comments in textual bitcode"), + cl::init(false)); + +static cl::opt ErrorRecover("error-recover", + cl::desc("Try to recover on errors"), + cl::init(false)); + +static cl::opt AcceptValueFlag( + "accept-value", + cl::desc("Value defining value acceptance in simplified form"), + cl::init(SimplifiedBitcodeActions::AcceptValueDefault)); + +static cl::opt AcceptRecordFlag( + "accept-record", + cl::desc("Value defining record acceptance for simplified form"), + cl::init(SimplifiedBitcodeActions::AcceptRecordDefault)); + +static cl::opt AcceptEscapeFlag( + "escape-byte", + cl::desc("Value defining byte escape for simplified form"), + cl::init(SimplifiedBitcodeActions::AcceptEscapeDefault)); + +static bool writeOutput(SmallVectorImpl &Output, + sys::fs::OpenFlags OpenFlags) { + std::error_code EC; + auto FDOut = make_unique(OutputFilename, EC, OpenFlags); + if (EC) { + errs() << EC.message() << '\n'; + return false; + } + for (auto Val : Output) + FDOut->os() << Val; + FDOut->keep(); + return true; +} + +inline static int convertToExitStatus(bool Value) { return !Value; } + +void initializeSimplifiedBitcodeActions(SimplifiedBitcodeActions &Actions) { + Actions.AcceptValue = AcceptValueFlag; + Actions.AcceptRecord = AcceptRecordFlag; + Actions.AcceptEscape = AcceptEscapeFlag; +} + +} // end of anonymous namespace + +int main(int argc, char *argv[]) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + cl::ParseCommandLineOptions(argc, argv, + "llvm-bcconv: Bitcode format converter\n"); + + ErrorOr> ErrOrInput = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (std::error_code EC = ErrOrInput.getError()) { + errs() << "Error reading " << InputFilename << ": " << EC.message() << "\n"; + return 1; + } + + std::unique_ptr Input(ErrOrInput.get().release()); + StringRef InputText(Input->getBuffer()); + BitcodeRecordList Records; + std::error_code EC; + switch (InputForm) { + case Binary: + EC = readBinaryBitcode(InputText, Records); + break; + case Textual: + EC = readTextualBitcode(InputText, Records, AllowTextComments); + break; + case Simplified: { + SimplifiedBitcodeActions Actions; + initializeSimplifiedBitcodeActions(Actions); + EC = readSimplifiedBitcode(InputText, Records, Actions); + break; + } + } + + if (EC) { + errs() << "Error: " << EC.message() << "\n"; + return convertToExitStatus(false); + } + + SmallVector Output; + std::string ErrorMessages; + bool WriteSuccessful = false; + switch (OutputForm) { + case Binary: + WriteSuccessful = writeBinaryBitcode(Output, Records, ErrorMessages, + ErrorRecover); + break; + case Textual: + WriteSuccessful = writeTextualBitcode(Output, Records, ErrorMessages, + ErrorRecover); + break; + case Simplified: { + SimplifiedBitcodeActions Actions; + initializeSimplifiedBitcodeActions(Actions); + WriteSuccessful = writeSimplifiedBitcode(Output, Records, Actions, + ErrorMessages, ErrorRecover); + break; + } + } + + if (!ErrorMessages.empty()) + errs() << ErrorMessages; + + if (!WriteSuccessful) { + return convertToExitStatus(false); + } + + return convertToExitStatus(writeOutput( + Output, OutputForm == Textual ? sys::fs::F_Text : sys::fs::F_None)); +}