diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst --- a/llvm/docs/BitCodeFormat.rst +++ b/llvm/docs/BitCodeFormat.rst @@ -552,6 +552,28 @@ * 23 --- `STRTAB_BLOCK`_ --- The bitcode file's string table. +Structured Data +^^^^^^^^^^^^^^^ + +Structured data, i.e., sequences of (key, value) pairs, is embedded into some +records by encoding it into a sequence of 64-bit integers. + +The encoding begins with the number of pairs, followed by the encoding of each +pair. Each (key, value) pair is encoded as a structured data symbol encoding +the key followed by a type discriminator indicating the type of the value, +followed by an encoding of the value itself. + +The following types discriminators are defined: + +* INT (1) --- encoded as a single 64-bit integer + +* TYPE (2) --- encoded as a single 64-bit integer; if 0, indicates a missing + type (nullptr); otherwise, a 1-based index into the type table + +* SYMBOL (3) --- encoded as a pair of 64-bit integers, the first one being an + offset into the IR module's string table, and the second being the length of + the symbol string + .. _MODULE_BLOCK: MODULE_BLOCK Contents @@ -1338,21 +1360,41 @@ The ``X86_AMX`` record (code 24) adds an ``x86_amx`` type to the type table. +TYPE_CODE_TARGET_TYPE_OLD Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[TARGET_TYPE_OLD, num_tys, ...ty_params..., ...int_params... ]`` + +The ``TARGET_TYPE_OLD`` record (code 26) adds a target extension type to the +type table, with a name defined by a previously encountered ``STRUCT_NAME`` +record. The operand fields are + +* *num_tys*: The number of parameters that are types (as opposed to integers) + +* *ty_params*: Type indices that represent type parameters + +* *int_params*: Numbers that correspond to the integer parameters. + TYPE_CODE_TARGET_TYPE Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[TARGET_TYPE, num_tys, ...ty_params..., ...int_params... ]`` +``[TARGET_TYPE, num_tys, ...ty_params..., num_ints, ...int_params..., + structured data ]`` -The ``TARGET_TYPE`` record (code 26) adds a target extension type to the type -table, with a name defined by a previously encountered ``STRUCT_NAME`` record. -The operand fields are +The ``TARGET_TYPE`` record (code 27) adds a target extension type to the +type table, with a name defined by a previously encountered ``STRUCT_NAME`` +record. The operand fields are * *num_tys*: The number of parameters that are types (as opposed to integers) * *ty_params*: Type indices that represent type parameters +* *num_ints*: The number of parameters that are integers + * *int_params*: Numbers that correspond to the integer parameters. +* The structured data is deserialized into target type info + .. _CONSTANTS_BLOCK: CONSTANTS_BLOCK Contents diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3785,13 +3785,6 @@ meanings of name and parameters are defined by the target. When being defined in LLVM IR, all of the type parameters must precede all of the integer parameters. -Specific target extension types are registered with LLVM as having specific -properties. These properties can be used to restrict the type from appearing in -certain contexts, such as being the type of a global variable or having a -``zeroinitializer`` constant be valid. A complete list of type properties may be -found in the documentation for ``llvm::TargetExtType::Property`` (`doxygen -`_). - :Syntax: .. code-block:: llvm @@ -3802,6 +3795,34 @@ target("label", 0, 1, 2) target("label", void, i32, 0, 1, 2) +Target extension types can be registered with LLVM as having specific +properties. These properties can be used to restrict the type from appearing in +certain contexts, such as being the type of a global variable or having a +``zeroinitializer`` constant be valid. A complete list of type properties may be +found in the documentation for ``llvm::TargetExtType::Property`` (`doxygen +`_). + +The registration of these properties may occur at runtime by defining an +``llvm::TargetExtTypeClass`` object and registering it with the ``LLVMContext``. +Some target extension types are registered in this way in all ``LLVMContext``s. + +The properties can also be set with a special ``type`` block of +:ref:`structured data ` at the beginning of the module file. + +:Syntax: + +:: + + type + +:Example: + +.. code-block:: llvm + + type target("mytype") { + layout: type i8, + hasZeroInit: true, + } .. _t_vector: diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -439,6 +439,9 @@ bool parseArrayVectorType(Type *&Result, bool IsVector); bool parseFunctionType(Type *&Result); + bool parseTargetExtTypeImpl(std::string &Name, + SmallVectorImpl &TypeParams, + SmallVectorImpl &IntParams); bool parseTargetExtType(Type *&Result); // Function Semantic Analysis. @@ -647,6 +650,8 @@ bool parseUseListOrderBB(); bool parseUseListOrderIndexes(SmallVectorImpl &Indexes); bool sortUseListOrder(Value *V, ArrayRef Indexes, SMLoc Loc); + + bool parseTypeInfo(); }; } // End llvm namespace diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -176,7 +176,9 @@ TYPE_CODE_OPAQUE_POINTER = 25, // OPAQUE_POINTER: [addrspace] - TYPE_CODE_TARGET_TYPE = 26, // TARGET_TYPE + TYPE_CODE_TARGET_TYPE_OLD = 26, // TARGET_TYPE without type info (old) + + TYPE_CODE_TARGET_TYPE = 27, // TARGET_TYPE }; enum OperandBundleTagCode { @@ -731,6 +733,12 @@ SYMTAB_BLOB = 1, }; +enum StructuredDataCodes { + SDATA_INT = 1, + SDATA_TYPE = 2, + SDATA_SYMBOL = 3, +}; + } // End bitc namespace } // End llvm namespace diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -32,6 +32,7 @@ class Value; class APInt; class LLVMContext; +class TargetExtTypeClass; /// Class to represent integer types. Note that this class is also used to /// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and @@ -737,9 +738,18 @@ /// integer parameters. The exact meaning of any parameters is dependent on the /// target. class TargetExtType : public Type { + friend class TargetTypeInfoDeserialize; + TargetExtType(LLVMContext &C, StringRef Name, ArrayRef Types, ArrayRef Ints); - + static std::pair getInternal(LLVMContext &C, + StringRef Name, + ArrayRef Types, + ArrayRef Ints); + void initFromClass(const TargetExtTypeClass *Class); + + Type *LayoutType = nullptr; + uint64_t Properties = 0; // These strings are ultimately owned by the context. StringRef Name; unsigned *IntParams; @@ -754,6 +764,22 @@ ArrayRef Types = std::nullopt, ArrayRef Ints = std::nullopt); + /// Return a target extension type having the specified name and optional + /// type and integer parameters, but also run the type through the verifier + /// of the corresponding type class, if any is registered, and return null + /// if verification fails. + static TargetExtType *getChecked(LLVMContext &Context, StringRef Name, + ArrayRef Types = std::nullopt, + ArrayRef Ints = std::nullopt, + raw_ostream &Errs = llvm::errs()); + + /// Return a target extension type of a known type class, having the specified + /// name and optional type and integer parameters. + static TargetExtType *get(LLVMContext &Context, + const TargetExtTypeClass *Class, StringRef Name, + ArrayRef Types = std::nullopt, + ArrayRef Ints = std::nullopt); + /// Return the name for this target extension type. Two distinct target /// extension types may have the same name if their type or integer parameters /// differ. @@ -791,13 +817,16 @@ }; /// Returns true if the target extension type contains the given property. - bool hasProperty(Property Prop) const; + bool hasProperty(Property Prop) const { return (Properties & Prop) == Prop; } + + /// Returns the properties bit field. + uint64_t getProperties() const { return Properties; } /// Returns an underlying layout type for the target extension type. This /// type can be used to query size and alignment information, if it is /// appropriate (although note that the layout type may also be void). It is /// not legal to bitcast between this type and the layout type, however. - Type *getLayoutType() const; + Type *getLayoutType() const { return LayoutType; } /// Methods for support type inquiry through isa, cast, and dyn_cast. static bool classof(const Type *T) { return T->getTypeID() == TargetExtTyID; } diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -34,6 +34,7 @@ template class SmallVectorImpl; template class StringMapEntry; class StringRef; +class TargetExtTypeClass; class Twine; class LLVMRemarkStreamer; @@ -320,6 +321,12 @@ /// Whether typed pointers are supported. If false, all pointers are opaque. bool supportsTypedPointers() const; + /// Register a custom extension type class. + void registerTargetExtTypeClass(const TargetExtTypeClass *TypeClass); + + /// Find an extension type class. + const TargetExtTypeClass *findTargetExtTypeClass(StringRef Name) const; + private: // Module needs access to the add/removeModule methods. friend class Module; diff --git a/llvm/include/llvm/IR/StructuredData.h b/llvm/include/llvm/IR/StructuredData.h --- a/llvm/include/llvm/IR/StructuredData.h +++ b/llvm/include/llvm/IR/StructuredData.h @@ -40,6 +40,7 @@ /// should be added to @ref Value. class Symbol { private: + friend class RegisterSymbol; friend class SymbolTableLockGuard; unsigned Id = 0; diff --git a/llvm/include/llvm/IR/TargetExtType.h b/llvm/include/llvm/IR/TargetExtType.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/IR/TargetExtType.h @@ -0,0 +1,111 @@ +//===- llvm/TargetExtType.h - Target extension types ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of classes that are needed to *define* +// custom target extension types. Source files that only *use* target extension +// types only need to include DerivedTypes.h. +// +// The implementation of these classes live in Type.cpp. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_TARGETEXTTYPE_H +#define LLVM_IR_TARGETEXTTYPE_H + +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/StructuredData.h" + +namespace llvm { + +/// Description of a class of target extension types. +/// +/// This allows users to define custom "target" extension types which are not +/// actually recognized by any backend but are lowered away by LLVM IR-level +/// transforms that are custom to the particular user of LLVM. +/// +/// Type classes must be explicitly registered with the context(s) in which +/// they are used, before target extension types are used for the first time. +/// +/// Type classes have callback functions to verify extension types and fill +/// in their type info. These callback functions operate on "partially +/// initialized types", meaning that the type name and parameters are +/// initialized, but the type info isn't. For example, this means that +/// the callback functions must not use TargetExtType::hasProperty on the +/// argument type. (However, any extension types that appear in the type's +/// parameter list *are* fully initialized.) +/// +/// The lifetime of a TargetExtTypeClass object must extend beyond the lifetime +/// of any context in which it is registered. +struct TargetExtTypeClass { + /// Name (or prefix of names) of the types in the class. + std::string Name; + + /// Whether Name is only a prefix. Prefixes must end in '.' + bool NameIsPrefix; + + /// Given a partially initialized type, return the layout type (defaults to + /// null indicating that the type can't be laid out in memory). + using GetLayoutTypeFn = Type *(TargetExtType *T); + GetLayoutTypeFn *GetLayoutType = nullptr; + + /// Given a partially initialized type, return the type properties (defaults + /// to 0). + using GetPropertiesFn = uint64_t(TargetExtType *T); + GetPropertiesFn *GetProperties = nullptr; + + /// Given a partially initialized type, check the type for validity and return + /// true if valid. + using VerifierFn = bool(TargetExtType *T, raw_ostream &Errs); + VerifierFn *Verifier = nullptr; + + TargetExtTypeClass(StringRef Name, bool NameIsPrefix = false) + : Name(Name), NameIsPrefix(NameIsPrefix) {} + + TargetExtTypeClass &setGetLayoutType(GetLayoutTypeFn *Fn) { + GetLayoutType = Fn; + return *this; + } + TargetExtTypeClass &setGetProperties(GetPropertiesFn *Fn) { + GetProperties = Fn; + return *this; + } + TargetExtTypeClass &setVerifier(VerifierFn *Fn) { + Verifier = Fn; + return *this; + } +}; + +class TargetTypeInfoDeserialize { + LLVMContext &Ctx; + std::string Name; + SmallVector Types; + SmallVector Ints; + + Type *LayoutType = nullptr; + uint64_t Properties = 0; + +public: + TargetTypeInfoDeserialize(LLVMContext &Ctx, StringRef Name, + ArrayRef Types, ArrayRef Ints); + + Error parseField(sdata::Symbol K, sdata::Value V); + + Expected finish(); + + static void registerSymbols(); +}; + +/// Serialize the target type info into structured data. +/// +/// If UseSchema is true, fields are generated according to a fixed schema. +SmallVector> +serializeTargetTypeInfo(TargetExtType *Ty, bool UseSchema); + +} // namespace llvm + +#endif // LLVM_IR_TARGETEXTTYPE_H diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -13,8 +13,8 @@ #include "llvm/AsmParser/LLParser.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/AsmParser/LLToken.h" #include "llvm/AsmParser/SlotMapping.h" @@ -38,6 +38,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/TargetExtType.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" @@ -444,6 +445,10 @@ if (parseUseListOrderBB()) return true; break; + case lltok::kw_type: + if (parseTypeInfo()) + return true; + break; } } } @@ -3197,9 +3202,10 @@ return false; } -/// parseTargetExtType - handle target extension type syntax +/// parseTargetExtTypeImpl - handle target extension type syntax /// TargetExtType -/// ::= 'target' '(' STRINGCONSTANT TargetExtTypeParams TargetExtIntParams ')' +/// ::= 'target' '(' STRINGCONSTANT TargetExtTypeParams TargetExtIntParams +/// ')' /// /// TargetExtTypeParams /// ::= /*empty*/ @@ -3208,20 +3214,20 @@ /// TargetExtIntParams /// ::= /*empty*/ /// ::= ',' uint32 TargetExtIntParams -bool LLParser::parseTargetExtType(Type *&Result) { +bool LLParser::parseTargetExtTypeImpl(std::string &Name, + SmallVectorImpl &TypeParams, + SmallVectorImpl &IntParams) { + assert(Lex.getKind() == lltok::kw_target); Lex.Lex(); // Eat the 'target' keyword. // Get the mandatory type name. - std::string TypeName; if (parseToken(lltok::lparen, "expected '(' in target extension type") || - parseStringConstant(TypeName)) + parseStringConstant(Name)) return true; // Parse all of the integer and type parameters at the same time; the use of // SeenInt will allow us to catch cases where type parameters follow integer // parameters. - SmallVector TypeParams; - SmallVector IntParams; bool SeenInt = false; while (Lex.getKind() == lltok::comma) { Lex.Lex(); // Eat the comma. @@ -3247,7 +3253,23 @@ if (parseToken(lltok::rparen, "expected ')' in target extension type")) return true; - Result = TargetExtType::get(Context, TypeName, TypeParams, IntParams); + return false; +} + +bool LLParser::parseTargetExtType(Type *&Result) { + auto Loc = Lex.getLoc(); + std::string TypeName; + SmallVector TypeParams; + SmallVector IntParams; + if (parseTargetExtTypeImpl(TypeName, TypeParams, IntParams)) + return true; + + std::string ErrStr; + raw_string_ostream Err(ErrStr); + Result = + TargetExtType::getChecked(Context, TypeName, TypeParams, IntParams, Err); + if (!Result) + return error(Loc, Twine("target type failed validation:\n") + ErrStr); return false; } @@ -10149,3 +10171,32 @@ return false; } + +bool LLParser::parseTypeInfo() { + LocTy KwLoc = Lex.getLoc(); + assert(Lex.getKind() == lltok::kw_type); + Lex.Lex(); + + if (Lex.getKind() != lltok::kw_target) + return tokError("expected 'target' type"); + + std::string TypeName; + SmallVector TypeParams; + SmallVector IntParams; + if (parseTargetExtTypeImpl(TypeName, TypeParams, IntParams)) + return true; + + TargetTypeInfoDeserialize D(Context, TypeName, TypeParams, IntParams); + + if (parseStructuredData( + [&](LocTy KeyLoc, sdata::Symbol K, LocTy ValueLoc, sdata::Value V) { + if (Error Err = D.parseField(K, V)) + return error(KeyLoc, toString(std::move(Err))); + return false; + })) + return true; + + if (Error Err = D.finish().takeError()) + return error(KwLoc, toString(std::move(Err))); + return false; +} diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -56,6 +56,8 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/StructuredData.h" +#include "llvm/IR/TargetExtType.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -564,6 +566,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { LLVMContext &Context; + sdata::SymbolTableLockGuard SdataSymbols; Module *TheModule = nullptr; // Next offset to start scanning for lazy parsing of function bodies. uint64_t NextUnreadBit = 0; @@ -858,6 +861,10 @@ DenseMap::iterator DeferredFunctionInfoIterator); SyncScope::ID getDecodedSyncScopeID(unsigned Val); + + Error decodeStructuredData( + ArrayRef &Tail, + function_ref ParseField); }; /// Class to manage reading and parsing function summary index bitcode @@ -1323,6 +1330,56 @@ } } +Error BitcodeReader::decodeStructuredData( + ArrayRef &Tail, + function_ref ParseField) { + if (Tail.empty()) + return error("missing sdata field count"); + + uint64_t NumFields = Tail[0]; + Tail = Tail.drop_front(1); + + for (uint64_t i = 0; i != NumFields; ++i) { + if (Tail.size() < 4) + return error("incomplete sdata field"); + + auto K = SdataSymbols.getSymbol(Context, + Strtab.slice(Tail[0], Tail[0] + Tail[1])); + Tail = Tail.drop_front(2); + + sdata::Value V; + switch (Tail[0]) { + case bitc::SDATA_INT: + V = Tail[1]; + Tail = Tail.drop_front(2); + break; + case bitc::SDATA_TYPE: { + Type *T = nullptr; + if (Tail[1] != 0) + T = getTypeByID(Tail[1] - 1); + V = T; + Tail = Tail.drop_front(2); + break; + } + case bitc::SDATA_SYMBOL: { + if (Tail.size() < 3) + return error("incomplete sdata symbol value"); + V = SdataSymbols.getSymbol(Context, + Strtab.slice(Tail[1], Tail[1] + Tail[2])); + Tail = Tail.drop_front(3); + break; + } + default: + return error("bad sdata value type: " + Twine(Tail[0])); + } + + if (Error Err = ParseField(K, V)) + return Err; + } + + return Error::success(); +} + Type *BitcodeReader::getTypeByID(unsigned ID) { // The type table size is always specified correctly. if (ID >= TypeList.size()) @@ -2502,7 +2559,10 @@ ResultTy = Res; break; } + case bitc::TYPE_CODE_TARGET_TYPE_OLD: case bitc::TYPE_CODE_TARGET_TYPE: { // TARGET_TYPE: [NumTy, Tys..., Ints...] + bool IsOld = MaybeRecord.get() == bitc::TYPE_CODE_TARGET_TYPE_OLD; + if (Record.size() < 1) return error("Invalid target extension type record"); @@ -2522,12 +2582,39 @@ return error("Invalid type"); } - for (unsigned i = NumTys + 1, e = Record.size(); i < e; i++) { - if (Record[i] > UINT_MAX) + ArrayRef Tail = ArrayRef(Record).drop_front(NumTys + 1); + unsigned NumInts; + + if (IsOld) { + NumInts = Tail.size(); + } else { + NumInts = Tail[0]; + Tail = Tail.drop_front(1); + } + + for (uint64_t Val : Tail.take_front(NumInts)) { + if (Val > UINT_MAX) return error("Integer parameter too large"); - IntParams.push_back(Record[i]); + IntParams.push_back(Val); } - ResultTy = TargetExtType::get(Context, TypeName, TypeParams, IntParams); + + Tail = Tail.drop_front(NumInts); + + TargetTypeInfoDeserialize D(Context, TypeName, TypeParams, IntParams); + + if (!IsOld) { + if (Error Err = decodeStructuredData( + Tail, [&](sdata::Symbol K, sdata::Value V) { + return D.parseField(K, V); + })) + return Err; + } + + auto Result = D.finish(); + if (Error Err = Result.takeError()) + return Err; + + ResultTy = Result.get(); TypeName.clear(); break; } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -52,6 +52,8 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/StructuredData.h" +#include "llvm/IR/TargetExtType.h" #include "llvm/IR/Type.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/Value.h" @@ -211,6 +213,11 @@ protected: void writePerModuleGlobalValueSummary(); + void encodeSymbol(SmallVectorImpl &Vals, sdata::Symbol S); + void + encodeStructuredData(SmallVectorImpl &Vals, + ArrayRef> Fields); + private: void writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, @@ -1057,8 +1064,12 @@ TypeVals.push_back(TET->getNumTypeParameters()); for (Type *InnerTy : TET->type_params()) TypeVals.push_back(VE.getTypeID(InnerTy)); + TypeVals.push_back(TET->getNumIntParameters()); for (unsigned IntParam : TET->int_params()) TypeVals.push_back(IntParam); + + auto Fields = serializeTargetTypeInfo(TET, /*UseSchema=*/false); + encodeStructuredData(TypeVals, Fields); break; } case Type::TypedPointerTyID: @@ -3967,6 +3978,39 @@ } } +void ModuleBitcodeWriterBase::encodeSymbol(SmallVectorImpl &Vals, + sdata::Symbol S) { + StringRef Str = S.getAsString(); + Vals.push_back(StrtabBuilder.add(Str)); + Vals.push_back(Str.size()); +} + +void ModuleBitcodeWriterBase::encodeStructuredData( + SmallVectorImpl &Vals, + ArrayRef> Fields) { + Vals.push_back(Fields.size()); + for (const auto &Field : Fields) { + encodeSymbol(Vals, Field.first); + + if (Field.second.isAPInt()) { + const APInt &V = Field.second.getAPInt(); + assert(V.getBitWidth() <= 64 && "large ints are currently unsupported"); + Vals.push_back(bitc::SDATA_INT); + Vals.push_back(V.getZExtValue()); + } else if (Field.second.isType()) { + Type *T = Field.second.getType(); + Vals.push_back(bitc::SDATA_TYPE); + Vals.push_back(T ? 1 + VE.getTypeID(T) : 0); + } else if (Field.second.isSymbol()) { + sdata::Symbol S = Field.second.getSymbol(); + Vals.push_back(bitc::SDATA_SYMBOL); + encodeSymbol(Vals, S); + } else { + llvm_unreachable("sdata value type not implemented"); + } + } +} + // Helper to emit a single function summary record. void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/TargetExtType.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/IR/TypedPointerType.h" @@ -506,7 +507,7 @@ bool TypePrinting::empty() { incorporateTypes(); - return TF.structs_empty() && Type2Number.empty(); + return TF.structs_empty() && Type2Number.empty() && TF.target_exts_empty(); } void TypePrinting::incorporateTypes() { @@ -3766,6 +3767,18 @@ TypePrinter.printStructBody(NamedType, Out); Out << '\n'; } + + for (TargetExtType *TTy : TF.target_exts()) { + auto Fields = serializeTargetTypeInfo(TTy, /*UseSchema=*/false); + if (Fields.empty()) + continue; + + Out << "type "; + TypePrinter.print(TTy, Out); + Out << ' '; + + printStructuredData(Fields); + } } /// printFunction - Print all aspects of a function. diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMRemarkStreamer.h" +#include "llvm/IR/TargetExtType.h" #include "llvm/Remarks/RemarkStreamer.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -103,6 +104,35 @@ assert(SystemSSID == SyncScope::System && "system synchronization scope ID drifted!"); (void)SystemSSID; + + TargetTypeInfoDeserialize::registerSymbols(); + + static const auto SpirvTypes = + TargetExtTypeClass("spirv.", true) + .setGetLayoutType([](TargetExtType *T) -> Type * { + return Type::getInt8PtrTy(T->getContext(), 0); + }) + .setGetProperties([](TargetExtType *T) -> uint64_t { + return TargetExtType::HasZeroInit | TargetExtType::CanBeGlobal; + }); + registerTargetExtTypeClass(&SpirvTypes); + + // TODO: Add validator! + static const auto Aarch64SVCount = + TargetExtTypeClass("aarch64.svcount") + .setGetLayoutType([](TargetExtType *T) -> Type * { + return ScalableVectorType::get(Type::getInt1Ty(T->getContext()), + 16); + }) + .setVerifier([](TargetExtType *T, raw_ostream &Errs) -> bool { + // TODO: Add a lit test! + if (T->getNumTypeParameters() || T->getNumIntParameters()) { + Errs << "aarch64.svcount cannot have parameters\n"; + return false; + } + return true; + }); + registerTargetExtTypeClass(&Aarch64SVCount); } LLVMContext::~LLVMContext() { delete pImpl; } @@ -375,3 +405,26 @@ bool LLVMContext::supportsTypedPointers() const { return !pImpl->getOpaquePointers(); } + +void LLVMContext::registerTargetExtTypeClass( + const TargetExtTypeClass *TypeClass) { + assert(!pImpl->TargetExtTypeClassesFrozen); + assert(!TypeClass->Name.empty()); + assert(TypeClass->NameIsPrefix == StringRef(TypeClass->Name).ends_with(".")); + + pImpl->TargetExtTypeClasses.push_back(TypeClass); +} + +const TargetExtTypeClass * +LLVMContext::findTargetExtTypeClass(StringRef Name) const { + pImpl->TargetExtTypeClassesFrozen = true; + + for (const TargetExtTypeClass *Class : pImpl->TargetExtTypeClasses) { + if ((Class->NameIsPrefix && Name.starts_with(Class->Name)) || + (!Class->NameIsPrefix && Name == Class->Name)) { + return Class; + } + } + + return nullptr; +} diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1611,6 +1611,9 @@ /// clients which do use GC. DenseMap GCNames; + SmallVector TargetExtTypeClasses; + bool TargetExtTypeClassesFrozen = false; + /// Flag to indicate if Value (other than GlobalValue) retains their name or /// not. bool DiscardValueNames = false; diff --git a/llvm/lib/IR/StructuredData.cpp b/llvm/lib/IR/StructuredData.cpp --- a/llvm/lib/IR/StructuredData.cpp +++ b/llvm/lib/IR/StructuredData.cpp @@ -56,11 +56,15 @@ sdata::RegisterSymbol::RegisterSymbol(StringRef Str) { SymbolTable &ST = SymbolTable::instance(); sys::ScopedWriter Lock(ST.Mutex); - if (!ST.NameToId.count(Str)) { + auto I = ST.NameToId.find(Str); + if (I == ST.NameToId.end()) { StringRef Saved = ST.Saver.save(Str); ST.IdToName.push_back(Saved); - ST.NameToId.try_emplace(Saved, ST.IdToName.size()); + I = ST.NameToId.try_emplace(Saved, ST.IdToName.size()).first; } + + S.Id = I->second; + S.String = ST.IdToName[S.Id - 1]; } SymbolTableLockGuard::SymbolTableLockGuard() { diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/TargetExtType.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/TypeSize.h" @@ -809,6 +810,118 @@ // TargetExtType Implementation //===----------------------------------------------------------------------===// +namespace { + +struct TargetTypeInfoKeys { + sdata::RegisterSymbol Layout{"layout"}; + sdata::RegisterSymbol HasZeroInit{"hasZeroInit"}; + sdata::RegisterSymbol CanBeGlobal{"canBeGlobal"}; + + static const TargetTypeInfoKeys &get() { + static const TargetTypeInfoKeys TTIK; + return TTIK; + } +}; + +} // anonymous namespace + +TargetTypeInfoDeserialize::TargetTypeInfoDeserialize(LLVMContext &Ctx, + StringRef Name, + ArrayRef Types, + ArrayRef Ints) + : Ctx(Ctx), Name(Name), Types(Types), Ints(Ints) {} + +void TargetTypeInfoDeserialize::registerSymbols() { + (void)TargetTypeInfoKeys::get(); +} + +Error TargetTypeInfoDeserialize::parseField(sdata::Symbol K, sdata::Value V) { + const auto &TTIK = TargetTypeInfoKeys::get(); + + if (K == TTIK.Layout) { + if (!V.isType()) + return sdata::makeDeserializeError("expected a type"); + LayoutType = V.getType(); + return Error::success(); + } + + struct { + const sdata::RegisterSymbol &Symbol; + uint64_t Value; + } BoolFields[] = { + {TTIK.CanBeGlobal, TargetExtType::CanBeGlobal}, + {TTIK.HasZeroInit, TargetExtType::HasZeroInit}, + }; + + for (const auto &Field : BoolFields) { + if (K == Field.Symbol) { + if (!V.isBool()) + return sdata::makeDeserializeError("expected a boolean"); + if (V.getBool()) + Properties |= Field.Value; + else + Properties &= Field.Value; + return Error::success(); + } + } + + return sdata::makeDeserializeError( + "expected 'layout', 'canBeGlobal', or 'hasZeroInit'"); +} + +Expected TargetTypeInfoDeserialize::finish() { + auto GetResult = TargetExtType::getInternal(Ctx, Name, Types, Ints); + TargetExtType *T = GetResult.first; + + if (GetResult.second) { + auto *Class = Ctx.findTargetExtTypeClass(Name); + if (Class) { + if (Class->Verifier) { + std::string ErrStr; + raw_string_ostream Errs(ErrStr); + if (!Class->Verifier(T, Errs)) + return sdata::makeDeserializeError(Twine("invalid target type\n:") + + ErrStr); + } + GetResult.first->initFromClass(Class); + } else { + T->LayoutType = LayoutType; + T->Properties = Properties; + } + } + + if (T->LayoutType != LayoutType) + return sdata::makeDeserializeError("target type has wrong layout type"); + if (T->Properties != Properties) + return sdata::makeDeserializeError("target type has wrong properties"); + + return T; +} + +SmallVector> +llvm::serializeTargetTypeInfo(TargetExtType *Ty, bool UseSchema) { + const auto &TTIK = TargetTypeInfoKeys::get(); + SmallVector> Fields; + + if (Ty->getLayoutType() || UseSchema) + Fields.emplace_back(TTIK.Layout, Ty->getLayoutType()); + + struct { + const sdata::RegisterSymbol &Symbol; + TargetExtType::Property Value; + } BoolFields[] = { + {TTIK.CanBeGlobal, TargetExtType::CanBeGlobal}, + {TTIK.HasZeroInit, TargetExtType::HasZeroInit}, + }; + + for (const auto &Field : BoolFields) { + if (UseSchema || Ty->hasProperty(Field.Value)) + Fields.emplace_back(Field.Symbol, Ty->hasProperty(Field.Value)); + } + + return Fields; +} + TargetExtType::TargetExtType(LLVMContext &C, StringRef Name, ArrayRef Types, ArrayRef Ints) : Type(C, TargetExtTyID), Name(C.pImpl->Saver.save(Name)) { @@ -830,60 +943,76 @@ TargetExtType *TargetExtType::get(LLVMContext &C, StringRef Name, ArrayRef Types, ArrayRef Ints) { - const TargetExtTypeKeyInfo::KeyTy Key(Name, Types, Ints); - TargetExtType *TT; - // Since we only want to allocate a fresh target type in case none is found - // and we don't want to perform two lookups (one for checking if existent and - // one for inserting the newly allocated one), here we instead lookup based on - // Key and update the reference to the target type in-place to a newly - // allocated one if not found. - auto Insertion = C.pImpl->TargetExtTypes.insert_as(nullptr, Key); - if (Insertion.second) { - // The target type was not found. Allocate one and update TargetExtTypes - // in-place. - TT = (TargetExtType *)C.pImpl->Alloc.Allocate( - sizeof(TargetExtType) + sizeof(Type *) * Types.size() + - sizeof(unsigned) * Ints.size(), - alignof(TargetExtType)); - new (TT) TargetExtType(C, Name, Types, Ints); - *Insertion.first = TT; - } else { - // The target type was found. Just return it. - TT = *Insertion.first; + auto Result = getInternal(C, Name, Types, Ints); + if (Result.second) { + const auto *Class = C.findTargetExtTypeClass(Name); + if (Class) + Result.first->initFromClass(Class); } - return TT; + return Result.first; +} + +TargetExtType *TargetExtType::getChecked(LLVMContext &C, StringRef Name, + ArrayRef Types, + ArrayRef Ints, + raw_ostream &Errs) { + auto Result = getInternal(C, Name, Types, Ints); + if (Result.second) { + const auto *Class = C.findTargetExtTypeClass(Name); + if (Class) { + if (Class->Verifier) { + if (!Class->Verifier(Result.first, Errs)) + return nullptr; + } + Result.first->initFromClass(Class); + } + } + return Result.first; } -namespace { -struct TargetTypeInfo { - Type *LayoutType; - uint64_t Properties; - - template - TargetTypeInfo(Type *LayoutType, ArgTys... Properties) - : LayoutType(LayoutType), Properties((0 | ... | Properties)) {} -}; -} // anonymous namespace - -static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { - LLVMContext &C = Ty->getContext(); - StringRef Name = Ty->getName(); - if (Name.startswith("spirv.")) - return TargetTypeInfo(Type::getInt8PtrTy(C, 0), TargetExtType::HasZeroInit, - TargetExtType::CanBeGlobal); - - // Opaque types in the AArch64 name space. - if (Name == "aarch64.svcount") - return TargetTypeInfo(ScalableVectorType::get(Type::getInt1Ty(C), 16)); +TargetExtType *TargetExtType::get(LLVMContext &C, + const TargetExtTypeClass *Class, + StringRef Name, ArrayRef Types, + ArrayRef Ints) { + assert(Name.starts_with(Class->Name)); + assert(Class->NameIsPrefix || Class->Name.size() == Name.size()); - return TargetTypeInfo(Type::getVoidTy(C)); + auto Result = getInternal(C, Name, Types, Ints); + if (Result.second) + Result.first->initFromClass(Class); + return Result.first; } -Type *TargetExtType::getLayoutType() const { - return getTargetTypeInfo(this).LayoutType; +void TargetExtType::initFromClass(const TargetExtTypeClass *Class) { + assert(!Class->Verifier || Class->Verifier(this, llvm::errs())); + if (Class->GetLayoutType) + LayoutType = Class->GetLayoutType(this); + if (Class->GetProperties) + Properties = Class->GetProperties(this); } -bool TargetExtType::hasProperty(Property Prop) const { - uint64_t Properties = getTargetTypeInfo(this).Properties; - return (Properties & Prop) == Prop; +std::pair +TargetExtType::getInternal(LLVMContext &C, StringRef Name, + ArrayRef Types, ArrayRef Ints) { + assert(!Name.ends_with(".")); + const TargetExtTypeKeyInfo::KeyTy Key(Name, Types, Ints); + + // Since we only want to allocate a fresh target type in case none is found + // and we don't want to perform two lookups (one for checking if existent and + // one for inserting the newly allocated one), here we instead lookup based on + // Key and update the reference to the target type in-place to a newly + // allocated one if not found. + auto Insertion = C.pImpl->TargetExtTypes.insert_as(nullptr, Key); + if (!Insertion.second) + return {*Insertion.first, false}; + + // The target type was not found. Allocate one and update TargetExtTypes + // in-place. + auto *TT = (TargetExtType *)C.pImpl->Alloc.Allocate( + sizeof(TargetExtType) + sizeof(Type *) * Types.size() + + sizeof(unsigned) * Ints.size(), + alignof(TargetExtType)); + new (TT) TargetExtType(C, Name, Types, Ints); + *Insertion.first = TT; + return {TT, true}; } diff --git a/llvm/test/Assembler/target-type-custom.ll b/llvm/test/Assembler/target-type-custom.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/target-type-custom.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; CHECK: type target("b", 1) { +; CHECK: canBeGlobal: true, +; CHECK: hasZeroInit: true, +; CHECK: } +; CHECK: type target("a") { +; CHECK: layout: type i32, +; CHECK: } + +type target("a") { + layout: type i32, +} +type target("b", 0) { + hasZeroInit: false, +} +type target("b", 1) { + hasZeroInit: true, + canBeGlobal: true, +} +type target("b", 2) {} + +; CHECK: @global = external global target("b", 1) +@global = external global target("b", 1) + +; CHECK: declare void @callee(target("b", 1)) +declare void @callee(target("b", 1)) + +; CHECK: define void @test1() { +; CHECK: %p = alloca target("a") +; CHECK: call void @callee(target("b", 1) zeroinitializer) +; CHECK: ret void +; CHECK: } +define void @test1() { + %p = alloca target("a") + call void @callee(target("b", 1) zeroinitializer) + ret void +} diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -18,6 +18,17 @@ module asm "beep boop" ; CHECK: module asm "beep boop" +type target("foo", i32, 5) { + layout: type i32, + canBeGlobal: true, + hasZeroInit: true, +} +; CHECK: type target("foo", i32, 5) { +; CHECK: layout: type i32, +; CHECK: canBeGlobal: true, +; CHECK: hasZeroInit: true, +; CHECK: } + ;; Comdats $comdat.any = comdat any ; CHECK: $comdat.any = comdat any @@ -1123,6 +1134,9 @@ declare void @llvm.token(token) ; CHECK: declare void @llvm.token(token) +declare target("foo", i32, 5) @target_ext_type(target("bar")) +; CHECK: declare target("foo", i32, 5) @target_ext_type(target("bar")) + ;; Inline Assembler Expressions define void @inlineasm(i32 %arg) { call i32 asm "bswap $0", "=r,r"(i32 %arg) diff --git a/llvm/test/Verifier/target-type.ll b/llvm/test/Verifier/target-type.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/target-type.ll @@ -0,0 +1,15 @@ +; RUN: split-file %s %t +; RUN: not llvm-as -disable-output %t/aarch64-svcount-type-parameter.ll 2>&1 | FileCheck --check-prefixes=CHECK %t/aarch64-svcount-type-parameter.ll +; RUN: not llvm-as -disable-output %t/aarch64-svcount-int-parameter.ll 2>&1 | FileCheck --check-prefixes=CHECK %t/aarch64-svcount-int-parameter.ll + +;--- aarch64-svcount-type-parameter.ll + +; CHECK: [[@LINE+2]]:20: error: target type failed validation: +; CHECK: aarch64.svcount cannot have parameters +declare void @test(target("aarch64.svcount", i32)) + +;--- aarch64-svcount-int-parameter.ll + +; CHECK: [[@LINE+2]]:20: error: target type failed validation: +; CHECK: aarch64.svcount cannot have parameters +declare void @test(target("aarch64.svcount", 5))