diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst --- a/llvm/docs/BitCodeFormat.rst +++ b/llvm/docs/BitCodeFormat.rst @@ -358,6 +358,18 @@ in the mapped in file and poke directly at it. A blob may only occur as the last operand of an abbreviation. +* Extended Array (code 6): This field is an array of tuples. The operand's extra + data specifies is the number of tuple fields minus 1. The operand is followed + by operands describing each field of the tuple. Extended arrays can appear + anywhere in an abbreviation. + + Nested extended arrays are currently not supported, but are a possible + extension. + + When reading an extended array in an abbreviated record, the first integer is + a vbr6 that indicates the array length, followed by the encoded elements of + the array. Each element is a tightly packed encoding of the tuple fields. + For example, target triples in LLVM modules are encoded as a record of the form ``[TRIPLE, 'a', 'b', 'c', 'd']``. Consider if the bitstream emitted the following abbrev entry: diff --git a/llvm/include/llvm/Bitstream/BitCodes.h b/llvm/include/llvm/Bitstream/BitCodes.h --- a/llvm/include/llvm/Bitstream/BitCodes.h +++ b/llvm/include/llvm/Bitstream/BitCodes.h @@ -36,16 +36,16 @@ unsigned Enc : 3; // The encoding to use. public: enum Encoding { - Fixed = 1, // A fixed width field, Val specifies number of bits. - VBR = 2, // A VBR field where Val specifies the width of each chunk. - Array = 3, // A sequence of fields, next field species elt encoding. - Char6 = 4, // A 6-bit fixed field which maps to [a-zA-Z0-9._]. - Blob = 5 // 32-bit aligned array of 8-bit characters. + Fixed = 1, // A fixed width field, Val specifies number of bits. + VBR = 2, // A VBR field where Val specifies the width of each chunk. + Array = 3, // A sequence of fields, next field species elt encoding. + Char6 = 4, // A 6-bit fixed field which maps to [a-zA-Z0-9._]. + Blob = 5, // 32-bit aligned array of 8-bit characters. + ExtArray = 6, // A VBR6-encoded element count followed by Val+1 fields + // describing the element encoding }; - static bool isValidEncoding(uint64_t E) { - return E >= 1 && E <= 5; - } + static bool isValidEncoding(uint64_t E) { return E >= 1 && E <= 6; } explicit BitCodeAbbrevOp(uint64_t V) : Val(V), IsLiteral(true) {} explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0) @@ -69,6 +69,7 @@ switch (E) { case Fixed: case VBR: + case ExtArray: return true; case Array: case Char6: diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -428,6 +428,23 @@ for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); } + } else if (Op.getEncoding() == BitCodeAbbrevOp::ExtArray) { + // Extended array case. + unsigned NumFields = Op.getEncodingData() + 1; + assert(i + NumFields < e && "missing array field specifiers"); + + unsigned NumElements = Vals[RecordIdx++]; + EmitVBR(NumElements, 6); + + for (; NumElements; --NumElements) { + for (unsigned field = 0; field != NumFields; ++field, ++RecordIdx) { + assert(RecordIdx < Vals.size()); + const BitCodeAbbrevOp &Enc = Abbv->getOperandInfo(i + field + 1); + EmitAbbreviatedField(Enc, Vals[RecordIdx]); + } + } + + i += NumFields; } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { // If this record has blob data, emit it, otherwise we must have record // entries to encode this way. @@ -443,7 +460,7 @@ } else { emitBlob(Vals.slice(RecordIdx)); } - } else { // Single scalar field. + } else { // Single scalar field. assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); EmitAbbreviatedField(Op, Vals[RecordIdx]); ++RecordIdx; diff --git a/llvm/include/llvm/IR/StructuredData.h b/llvm/include/llvm/IR/StructuredData.h --- a/llvm/include/llvm/IR/StructuredData.h +++ b/llvm/include/llvm/IR/StructuredData.h @@ -72,6 +72,39 @@ } }; +/// Describes the "schema" of a field of structured data. +/// +/// This is used to describe structures for bitcode abbreviation. +class SchemaField { +public: + enum class Type { + /// Fixed-width APInt (possibly a boolean). TypeData is the number of bits. + Int, + + /// LLVM type + Type, + }; + +private: + StringRef TheKey; + Type TheType; + unsigned TypeData; + +public: + SchemaField(StringRef K, Type T, unsigned TD = 0) + : TheKey(K), TheType(T), TypeData(TD) { + assert((T != Type::Int || TD != 0) && + "integer schema types must have a bit width"); + } + + StringRef getKey() const { return TheKey; } + Type getType() const { return TheType; } + unsigned getTypeBitWidth() const { + assert(TheType == Type::Int); + return TypeData; + } +}; + // Convenience function to create an Error object when an error is encountered // while deserializing structured data. Error makeDeserializeError(const Twine &Msg); diff --git a/llvm/include/llvm/IR/TargetExtType.h b/llvm/include/llvm/IR/TargetExtType.h --- a/llvm/include/llvm/IR/TargetExtType.h +++ b/llvm/include/llvm/IR/TargetExtType.h @@ -98,9 +98,13 @@ Expected finish(); }; +/// Return the schema for target type info. +ArrayRef getTargetTypeInfoSchema(); + /// Serialize the target type info into structured data. /// -/// If UseSchema is true, fields are generated according to a fixed schema. +/// If UseSchema is true, fields are generated according to the schema returned +/// by @ref getTargetTypeInfoSchema. SmallVector> serializeTargetTypeInfo(TargetExtType *Ty, bool UseSchema); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -213,6 +213,8 @@ protected: void writePerModuleGlobalValueSummary(); + void encodeSchemaAbbrev(BitCodeAbbrev &Abbrev, + ArrayRef Fields); void encodeStructuredData(SmallVectorImpl &Vals, ArrayRef> Fields); @@ -951,6 +953,9 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for TYPE_CODE_TARGET_TYPE -- only emitted when used. + unsigned TargetAbbrev = 0; + // Emit an entry count so the reader can reserve space. TypeVals.push_back(TypeList.size()); Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals); @@ -1067,8 +1072,20 @@ for (unsigned IntParam : TET->int_params()) TypeVals.push_back(IntParam); - auto Fields = serializeTargetTypeInfo(TET, /*UseSchema=*/false); + auto Fields = serializeTargetTypeInfo(TET, /*UseSchema=*/true); encodeStructuredData(TypeVals, Fields); + + if (!TargetAbbrev) { + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_TARGET_TYPE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::ExtArray, 0)); // types + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::ExtArray, 0)); // ints + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + encodeSchemaAbbrev(*Abbv, getTargetTypeInfoSchema()); + TargetAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + } + AbbrevToUse = TargetAbbrev; break; } case Type::TypedPointerTyID: @@ -3977,6 +3994,31 @@ } } +void ModuleBitcodeWriterBase::encodeSchemaAbbrev( + BitCodeAbbrev &Abbrev, ArrayRef Fields) { + Abbrev.Add(BitCodeAbbrevOp(Fields.size())); + for (const auto &Field : Fields) { + Abbrev.Add(BitCodeAbbrevOp(StrtabBuilder.add(Field.getKey()))); + Abbrev.Add(BitCodeAbbrevOp(Field.getKey().size())); + + if (Field.getType() == sdata::SchemaField::Type::Int) { + unsigned NumBits = Field.getTypeBitWidth(); + Abbrev.Add(BitCodeAbbrevOp(bitc::SDATA_INT_BASE + NumBits)); + while (NumBits > 64) { + Abbrev.Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 64)); + NumBits -= 64; + } + Abbrev.Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + } else if (Field.getType() == sdata::SchemaField::Type::Type) { + Abbrev.Add(BitCodeAbbrevOp(bitc::SDATA_TYPE)); + Abbrev.Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + VE.computeBitsRequiredForTypeIndicies())); + } else { + llvm_unreachable("unimplemented schema field type"); + } + } +} + void ModuleBitcodeWriterBase::encodeStructuredData( SmallVectorImpl &Vals, ArrayRef> Fields) { diff --git a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp --- a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp +++ b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp @@ -75,6 +75,7 @@ // Decode the value as we are commanded. switch (Op.getEncoding()) { case BitCodeAbbrevOp::Array: + case BitCodeAbbrevOp::ExtArray: case BitCodeAbbrevOp::Blob: llvm_unreachable("Should not reach here"); case BitCodeAbbrevOp::Fixed: @@ -123,6 +124,7 @@ Code = CodeOp.getLiteralValue(); else { if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || + CodeOp.getEncoding() == BitCodeAbbrevOp::ExtArray || CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) return llvm::createStringError( std::errc::illegal_byte_sequence, @@ -139,6 +141,7 @@ continue; if (Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::ExtArray && Op.getEncoding() != BitCodeAbbrevOp::Blob) { if (Expected MaybeField = readAbbreviatedField(*this, Op)) continue; @@ -186,6 +189,38 @@ continue; } + if (Op.getEncoding() == BitCodeAbbrevOp::ExtArray) { + // Extended array case. + unsigned NumFields = Op.getEncodingData() + 1; + if (i + NumFields >= e) + return error("Missing array element specifiers"); + + Expected MaybeNumElts = ReadVBR(6); + if (!MaybeNumElts) { + return error( + ("Failed to read size: " + toString(MaybeNumElts.takeError())) + .c_str()); + } + uint32_t NumElts = MaybeNumElts.get(); + if (NumElts > std::numeric_limits::max() / NumFields) + return error("Size overflow"); + + size_t NumVals = (size_t)NumElts * NumFields; + if (!isSizePlausible(NumVals)) + return error("Size is not plausible"); + + for (; NumElts; --NumElts) { + for (unsigned field = 0; field != NumFields; ++field) { + const BitCodeAbbrevOp &Enc = Abbv->getOperandInfo(i + field + 1); + if (Error Err = readAbbreviatedField(*this, Enc).takeError()) + return std::move(Err); + } + } + + i += NumFields; + continue; + } + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. Expected MaybeNum = ReadVBR(6); @@ -250,6 +285,7 @@ Code = CodeOp.getLiteralValue(); else { if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || + CodeOp.getEncoding() == BitCodeAbbrevOp::ExtArray || CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) return error("Abbreviation starts with an Array or a Blob"); if (Expected MaybeCode = readAbbreviatedField(*this, CodeOp)) @@ -266,6 +302,7 @@ } if (Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::ExtArray && Op.getEncoding() != BitCodeAbbrevOp::Blob) { if (Expected MaybeVal = readAbbreviatedField(*this, Op)) Vals.push_back(MaybeVal.get()); @@ -324,6 +361,43 @@ continue; } + if (Op.getEncoding() == BitCodeAbbrevOp::ExtArray) { + // Extended array case. + unsigned NumFields = Op.getEncodingData() + 1; + if (i + NumFields >= e) + return error("Missing array element specifiers"); + + Expected MaybeNumElts = ReadVBR(6); + if (!MaybeNumElts) { + return error( + ("Failed to read size: " + toString(MaybeNumElts.takeError())) + .c_str()); + } + uint32_t NumElts = MaybeNumElts.get(); + if (NumElts > std::numeric_limits::max() / NumFields) + return error("Size overflow"); + + size_t NumVals = (size_t)NumElts * NumFields; + if (!isSizePlausible(NumVals)) + return error("Size is not plausible"); + Vals.reserve(Vals.size() + NumVals); + + Vals.push_back(NumElts); + + for (; NumElts; --NumElts) { + for (unsigned field = 0; field != NumFields; ++field) { + const BitCodeAbbrevOp &Enc = Abbv->getOperandInfo(i + field + 1); + if (Expected MaybeVal = readAbbreviatedField(*this, Enc)) + Vals.push_back(MaybeVal.get()); + else + return MaybeVal.takeError(); + } + } + + i += NumFields; + continue; + } + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. Expected MaybeNumElts = ReadVBR(6); diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -857,6 +857,23 @@ // TargetExtType Implementation //===----------------------------------------------------------------------===// +namespace { + +struct TargetTypeInfoSchema { + sdata::SchemaField Schema[3] = { + {"layout", sdata::SchemaField::Type::Type}, + {"canBeGlobal", sdata::SchemaField::Type::Int, 1}, + {"hasZeroInit", sdata::SchemaField::Type::Int, 1}, + }; + + static const TargetTypeInfoSchema &get() { + static const TargetTypeInfoSchema TTIS; + return TTIS; + } +}; + +} // anonymous namespace + TargetTypeInfoDeserialize::TargetTypeInfoDeserialize(LLVMContext &Ctx, StringRef Name, ArrayRef Types, @@ -926,6 +943,11 @@ return T; } +ArrayRef llvm::getTargetTypeInfoSchema() { + const auto &TTIS = TargetTypeInfoSchema::get(); + return TTIS.Schema; +} + SmallVector> llvm::serializeTargetTypeInfo(TargetExtType *Ty, bool UseSchema) { SmallVector> Fields;