diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -32,11 +32,26 @@ class LLVMContext; class Module; class MemoryBuffer; +class Metadata; class ModuleSummaryIndex; +class Type; +class Value; typedef llvm::function_ref(StringRef)> DataLayoutCallbackTy; +typedef llvm::function_ref GetTypeByIDTy; + +typedef llvm::function_ref GetContainedTypeIDTy; + +typedef llvm::function_ref + ValueTypeCallbackTy; + +typedef llvm::function_ref + MDTypeCallbackTy; + // These functions are for converting Expected/Error values to // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: // Remove these functions once no longer needed by the C and libLTO APIs. @@ -83,10 +98,11 @@ friend Expected getBitcodeFileContents(MemoryBufferRef Buffer); - Expected> - getModuleImpl(LLVMContext &Context, bool MaterializeAll, - bool ShouldLazyLoadMetadata, bool IsImporting, - DataLayoutCallbackTy DataLayoutCallback); + Expected> getModuleImpl( + LLVMContext &Context, bool MaterializeAll, bool ShouldLazyLoadMetadata, + bool IsImporting, DataLayoutCallbackTy DataLayoutCallback, + std::optional ValueTypeCallback = std::nullopt, + std::optional MDTypeCallback = std::nullopt); public: StringRef getBuffer() const { @@ -107,8 +123,11 @@ /// Read the entire bitcode module and return it. Expected> parseModule( - LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = - [](StringRef) { return std::nullopt; }); + LLVMContext &Context, + DataLayoutCallbackTy DataLayoutCallback = + [](StringRef) { return std::nullopt; }, + std::optional ValueTypeCallback = std::nullopt, + std::optional MDTypeCallback = std::nullopt); /// Returns information about the module to be used for LTO: whether to /// compile with ThinLTO, and whether it has a summary. @@ -173,11 +192,20 @@ Expected getBitcodeProducerString(MemoryBufferRef Buffer); /// Read the specified bitcode file, returning the module. + /// The ValueTypeCallback is called for every function definition or + /// declaration and allows accessing the type information, also behind + /// pointers. This can be useful, when the opaque pointer upgrade cleans all + /// type information behind pointers. + /// The second argument to ValueTypeCallback is the type ID of the + /// function, the two passed functions can be used to extract type + /// information. + /// The MDTypeCallback is called for every value in metadata. Expected> parseBitcodeFile( MemoryBufferRef Buffer, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { - return std::nullopt; - }); + DataLayoutCallbackTy DataLayoutCallback = + [](StringRef) { return std::nullopt; }, + std::optional ValueTypeCallback = std::nullopt, + std::optional MDTypeCallback = std::nullopt); /// Returns LTO information for the specified bitcode file. Expected getBitcodeLTOInfo(MemoryBufferRef Buffer); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -674,6 +674,8 @@ std::vector BundleTags; SmallVector SSIDs; + std::optional ValueTypeCallback; + public: BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context); @@ -688,7 +690,10 @@ /// \returns true if an error occurred. Error parseBitcodeInto( Module *M, bool ShouldLazyLoadMetadata, bool IsImporting, - DataLayoutCallbackTy DataLayoutCallback); + DataLayoutCallbackTy DataLayoutCallback = + [](StringRef) { return std::nullopt; }, + std::optional ValueTypeCallback = std::nullopt, + std::optional MDTypeCallback = std::nullopt); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -709,6 +714,7 @@ unsigned getContainedTypeID(unsigned ID, unsigned Idx = 0); unsigned getVirtualTypeID(Type *Ty, ArrayRef ContainedTypeIDs = {}); + void callValueTypeCallback(Value *F, unsigned TypeID); Expected materializeValue(unsigned ValID, BasicBlock *InsertBB); Expected getValueForInitializer(unsigned ID); @@ -821,9 +827,9 @@ Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule( uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { - return std::nullopt; - }); + DataLayoutCallbackTy DataLayoutCallback = + [](StringRef) { return std::nullopt; }, + std::optional ValueTypeCallback = std::nullopt); Error parseComdatRecord(ArrayRef Record); Error parseGlobalVarRecord(ArrayRef Record); @@ -3919,6 +3925,14 @@ return Error::success(); } +void BitcodeReader::callValueTypeCallback(Value *F, unsigned TypeID) { + if (ValueTypeCallback) { + (*ValueTypeCallback)( + F, TypeID, [this](unsigned I) { return getTypeByID(I); }, + [this](unsigned I, unsigned J) { return getContainedTypeID(I, J); }); + } +} + Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, @@ -3963,6 +3977,7 @@ uint64_t RawLinkage = Record[3]; Func->setLinkage(getDecodedLinkage(RawLinkage)); Func->setAttributes(getAttributes(Record[4])); + callValueTypeCallback(Func, FTyID); // Upgrade any old-style byval or sret without a type by propagating the // argument's pointee type. There should be no opaque pointers where the byval @@ -4178,9 +4193,11 @@ return Error::success(); } -Error BitcodeReader::parseModule(uint64_t ResumeBit, - bool ShouldLazyLoadMetadata, - DataLayoutCallbackTy DataLayoutCallback) { +Error BitcodeReader::parseModule( + uint64_t ResumeBit, bool ShouldLazyLoadMetadata, + DataLayoutCallbackTy DataLayoutCallback, + std::optional ValueTypeCallback) { + this->ValueTypeCallback = std::move(ValueTypeCallback); if (ResumeBit) { if (Error JumpFailed = Stream.JumpToBit(ResumeBit)) return JumpFailed; @@ -4465,15 +4482,22 @@ } Record.clear(); } + this->ValueTypeCallback = std::nullopt; } -Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata, - bool IsImporting, - DataLayoutCallbackTy DataLayoutCallback) { +Error BitcodeReader::parseBitcodeInto( + Module *M, bool ShouldLazyLoadMetadata, bool IsImporting, + DataLayoutCallbackTy DataLayoutCallback, + std::optional ValueTypeCallback, + std::optional MDTypeCallback) { TheModule = M; - MDLoader = MetadataLoader(Stream, *M, ValueList, IsImporting, - [&](unsigned ID) { return getTypeByID(ID); }); - return parseModule(0, ShouldLazyLoadMetadata, DataLayoutCallback); + MDLoader = MetadataLoader( + Stream, *M, ValueList, IsImporting, + [&](unsigned ID) { return getTypeByID(ID); }, + [&](unsigned I, unsigned J) { return getContainedTypeID(I, J); }, + MDTypeCallback); + return parseModule(0, ShouldLazyLoadMetadata, DataLayoutCallback, + std::move(ValueTypeCallback)); } Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) { @@ -7899,10 +7923,11 @@ /// /// \param[in] MaterializeAll Set to \c true if we should materialize /// everything. -Expected> -BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, - bool ShouldLazyLoadMetadata, bool IsImporting, - DataLayoutCallbackTy DataLayoutCallback) { +Expected> BitcodeModule::getModuleImpl( + LLVMContext &Context, bool MaterializeAll, bool ShouldLazyLoadMetadata, + bool IsImporting, DataLayoutCallbackTy DataLayoutCallback, + std::optional ValueTypeCallback, + std::optional MDTypeCallback) { BitstreamCursor Stream(Buffer); std::string ProducerIdentification; @@ -7924,8 +7949,9 @@ M->setMaterializer(R); // Delay parsing Metadata if ShouldLazyLoadMetadata is true. - if (Error Err = R->parseBitcodeInto(M.get(), ShouldLazyLoadMetadata, - IsImporting, DataLayoutCallback)) + if (Error Err = R->parseBitcodeInto( + M.get(), ShouldLazyLoadMetadata, IsImporting, DataLayoutCallback, + std::move(ValueTypeCallback), std::move(MDTypeCallback))) return std::move(Err); if (MaterializeAll) { @@ -8110,20 +8136,27 @@ Expected> BitcodeModule::parseModule(LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback) { - return getModuleImpl(Context, true, false, false, DataLayoutCallback); + DataLayoutCallbackTy DataLayoutCallback, + std::optional ValueTypeCallback, + std::optional MDTypeCallback) { + return getModuleImpl(Context, true, false, false, DataLayoutCallback, + std::move(ValueTypeCallback), std::move(MDTypeCallback)); // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. } Expected> llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + std::optional ValueTypeCallback, + std::optional MDTypeCallback) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->parseModule(Context, DataLayoutCallback); + return BM->parseModule(Context, DataLayoutCallback, + std::move(ValueTypeCallback), + std::move(MDTypeCallback)); } Expected llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) { diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.h b/llvm/lib/Bitcode/Reader/MetadataLoader.h --- a/llvm/lib/Bitcode/Reader/MetadataLoader.h +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.h @@ -29,6 +29,14 @@ class Type; template class ArrayRef; +typedef llvm::function_ref GetTypeByIDTy; + +typedef llvm::function_ref GetContainedTypeIDTy; + +typedef llvm::function_ref + MDTypeCallbackTy; + /// Helper class that handles loading Metadatas and keeping them available. class MetadataLoader { class MetadataLoaderImpl; @@ -39,7 +47,9 @@ ~MetadataLoader(); MetadataLoader(BitstreamCursor &Stream, Module &TheModule, BitcodeReaderValueList &ValueList, bool IsImporting, - std::function getTypeByID); + GetTypeByIDTy getTypeByID, + GetContainedTypeIDTy getContainedTypeID, + std::optional MDTypeCallback); MetadataLoader &operator=(MetadataLoader &&); MetadataLoader(MetadataLoader &&); diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -406,7 +406,9 @@ BitstreamCursor &Stream; LLVMContext &Context; Module &TheModule; - std::function getTypeByID; + GetTypeByIDTy getTypeByID; + GetContainedTypeIDTy getContainedTypeID; + std::optional MDTypeCallback; /// Cursor associated with the lazy-loading of Metadata. This is the easy way /// to keep around the right "context" (Abbrev list) to be able to jump in @@ -627,15 +629,20 @@ upgradeCUVariables(); } + void callMDTypeCallback(Metadata **Val, unsigned TypeID); + public: MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule, BitcodeReaderValueList &ValueList, - std::function getTypeByID, + GetTypeByIDTy getTypeByID, + GetContainedTypeIDTy getContainedTypeID, + std::optional MDTypeCallback, bool IsImporting) : MetadataList(TheModule.getContext(), Stream.SizeInBytes()), ValueList(ValueList), Stream(Stream), Context(TheModule.getContext()), TheModule(TheModule), getTypeByID(std::move(getTypeByID)), - IsImporting(IsImporting) {} + getContainedTypeID(std::move(getContainedTypeID)), + MDTypeCallback(std::move(MDTypeCallback)), IsImporting(IsImporting) {} Error parseMetadata(bool ModuleLevel); @@ -952,6 +959,13 @@ } } +void MetadataLoader::MetadataLoaderImpl::callMDTypeCallback(Metadata **Val, + unsigned TypeID) { + if (MDTypeCallback) { + (*MDTypeCallback)(Val, TypeID, getTypeByID, getContainedTypeID); + } +} + /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing /// module level metadata. Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { @@ -1255,9 +1269,10 @@ /*ConstExprInsertBB*/ nullptr); if (!V) return error("Invalid value reference from old metadata"); - auto *MD = ValueAsMetadata::get(V); + Metadata *MD = ValueAsMetadata::get(V); assert(isa(MD) && "Expected non-function-local metadata"); + callMDTypeCallback(&MD, TyID); Elts.push_back(MD); } else Elts.push_back(nullptr); @@ -1280,7 +1295,9 @@ if (!V) return error("Invalid value reference from metadata"); - MetadataList.assignValue(ValueAsMetadata::get(V), NextMetadataNo); + Metadata *MD = ValueAsMetadata::get(V); + callMDTypeCallback(&MD, TyID); + MetadataList.assignValue(MD, NextMetadataNo); NextMetadataNo++; break; } @@ -2358,10 +2375,13 @@ MetadataLoader::~MetadataLoader() = default; MetadataLoader::MetadataLoader(BitstreamCursor &Stream, Module &TheModule, BitcodeReaderValueList &ValueList, - bool IsImporting, - std::function getTypeByID) + bool IsImporting, GetTypeByIDTy getTypeByID, + GetContainedTypeIDTy getContainedTypeID, + std::optional MDTypeCallback) : Pimpl(std::make_unique( - Stream, TheModule, ValueList, std::move(getTypeByID), IsImporting)) {} + Stream, TheModule, ValueList, std::move(getTypeByID), + std::move(getContainedTypeID), std::move(MDTypeCallback), + IsImporting)) {} Error MetadataLoader::parseMetadata(bool ModuleLevel) { return Pimpl->parseMetadata(ModuleLevel); diff --git a/llvm/unittests/Bitcode/BitReaderTest.cpp b/llvm/unittests/Bitcode/BitReaderTest.cpp --- a/llvm/unittests/Bitcode/BitReaderTest.cpp +++ b/llvm/unittests/Bitcode/BitReaderTest.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/AsmParser/Parser.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -255,4 +257,204 @@ EXPECT_FALSE(verifyModule(*M, &dbgs())); } +// Helper function to convert type metadata to a string for testing +static std::string mdToString(Metadata *MD) { + std::string S; + if (auto *VMD = dyn_cast(MD)) { + if (VMD->getType()->isPointerTy()) { + S += "ptr"; + return S; + } + } + + if (auto *TMD = dyn_cast(MD)) { + S += "!{"; + for (unsigned I = 0; I < TMD->getNumOperands(); I++) { + if (I != 0) + S += ", "; + S += mdToString(TMD->getOperand(I).get()); + } + S += "}"; + } else if (auto *SMD = dyn_cast(MD)) { + S += "!'"; + S += SMD->getString(); + S += "'"; + } else if (auto *I = mdconst::dyn_extract(MD)) { + S += std::to_string(I->getZExtValue()); + } else if (auto *P = mdconst::dyn_extract(MD)) { + auto *Ty = P->getType(); + if (Ty->isIntegerTy()) { + S += "i"; + S += std::to_string(Ty->getIntegerBitWidth()); + } else if (Ty->isStructTy()) { + S += "%"; + S += Ty->getStructName(); + } else { + llvm_unreachable("unhandled poison metadata"); + } + } else { + llvm_unreachable("unhandled metadata"); + } + return S; +} + +// Recursively look into a (pointer) type and the the type. +// For primitive types it's a poison value of the type, for a pointer it's a +// metadata tuple with the addrspace and the referenced type. For a function, +// it's a tuple where the first element is the string "function", the second +// element is the return type or the string "void" and the following elements +// are the argument types. +Metadata *getTypeMetadataEntry(unsigned TypeID, LLVMContext &Context, + GetTypeByIDTy GetTypeByID, + GetContainedTypeIDTy GetContainedTypeID) { + Type *Ty = GetTypeByID(TypeID); + if (auto *FTy = dyn_cast(Ty)) { + // Save the function signature as metadata + SmallVector SignatureMD; + SignatureMD.push_back(MDString::get(Context, "function")); + // Return type + if (FTy->getReturnType()->isVoidTy()) + SignatureMD.push_back(MDString::get(Context, "void")); + else + SignatureMD.push_back(getTypeMetadataEntry(GetContainedTypeID(TypeID, 0), + Context, GetTypeByID, + GetContainedTypeID)); + // Arguments + for (unsigned I = 0; I != FTy->getNumParams(); ++I) + SignatureMD.push_back( + getTypeMetadataEntry(GetContainedTypeID(TypeID, I + 1), Context, + GetTypeByID, GetContainedTypeID)); + + return MDTuple::get(Context, SignatureMD); + } + + if (!Ty->isPointerTy()) + return ConstantAsMetadata::get(PoisonValue::get(Ty)); + + // Return !{, } for pointer + SmallVector MD; + MD.push_back(ConstantAsMetadata::get(ConstantInt::get( + Type::getInt32Ty(Context), Ty->getPointerAddressSpace()))); + MD.push_back(getTypeMetadataEntry(GetContainedTypeID(TypeID, 0), Context, + GetTypeByID, GetContainedTypeID)); + return MDTuple::get(Context, MD); +} + +// Test that when reading bitcode with typed pointers and upgrading them to +// opaque pointers, the type information of function signatures can be extracted +// and stored in metadata. +TEST(BitReaderTest, AccessFunctionTypeInfo) { + SmallString<1024> Mem; + LLVMContext WriteContext; + writeModuleToBuffer( + parseAssembly( + WriteContext, + "define void @func() {\n" + " unreachable\n" + "}\n" + "declare i32 @func_header()\n" + "declare i8* @ret_ptr()\n" + "declare i8* @ret_and_arg_ptr(i32 addrspace(8)*)\n" + "declare i8 addrspace(1)* @double_ptr(i32* addrspace(2)*, i32***)\n"), + Mem); + + LLVMContext Context; + Context.setOpaquePointers(true); + + Expected> ModuleOrErr = parseBitcodeFile( + MemoryBufferRef(Mem.str(), "test"), Context, + [](StringRef) { return std::nullopt; }, + // Supply a lambda that stores the signature of a function into metadata, + // so that the types behind pointers can be accessed. + // Each function gets a !types metadata, which is a tuple with one element + // for a non-void return type and every argument. For primitive types it's + // a poison value of the type, for a pointer it's a metadata tuple with + // the addrspace and the referenced type. + std::optional( + [&](Value *V, unsigned TypeID, GetTypeByIDTy GetTypeByID, + GetContainedTypeIDTy GetContainedTypeID) { + if (auto *F = dyn_cast(V)) { + auto *MD = getTypeMetadataEntry(TypeID, F->getContext(), + GetTypeByID, GetContainedTypeID); + F->setMetadata("types", cast(MD)); + } + })); + + if (!ModuleOrErr) + report_fatal_error("Could not parse bitcode module"); + std::unique_ptr M = std::move(ModuleOrErr.get()); + + EXPECT_EQ(mdToString(M->getFunction("func")->getMetadata("types")), + "!{!'function', !'void'}"); + EXPECT_EQ(mdToString(M->getFunction("func_header")->getMetadata("types")), + "!{!'function', i32}"); + EXPECT_EQ(mdToString(M->getFunction("ret_ptr")->getMetadata("types")), + "!{!'function', !{0, i8}}"); + EXPECT_EQ(mdToString(M->getFunction("ret_and_arg_ptr")->getMetadata("types")), + "!{!'function', !{0, i8}, !{8, i32}}"); + EXPECT_EQ(mdToString(M->getFunction("double_ptr")->getMetadata("types")), + "!{!'function', !{1, i8}, !{2, !{0, i32}}, !{0, !{0, !{0, i32}}}}"); +} + +// Test that when reading bitcode with typed pointers and upgrading them to +// opaque pointers, the type information of pointers in metadata can be +// extracted and stored in metadata. +TEST(BitReaderTest, AccessMetadataTypeInfo) { + SmallString<1024> Mem; + LLVMContext WriteContext; + writeModuleToBuffer( + parseAssembly(WriteContext, + "%dx.types.f32 = type { float }\n" + "declare void @main()\n" + "!md = !{!0}\n" + "!md2 = !{!1}\n" + "!0 = !{i32 2, %dx.types.f32 addrspace(1)* undef, void ()* " + "@main, void() addrspace(3)* null}\n" + "!1 = !{i8*(i32* addrspace(2)*) addrspace(4)* undef, " + "i32*** undef}\n"), + Mem); + + LLVMContext Context; + Context.setOpaquePointers(true); + + Expected> ModuleOrErr = parseBitcodeFile( + MemoryBufferRef(Mem.str(), "test"), Context, + [](StringRef) { return std::nullopt; }, std::nullopt, + // Supply a lambda that stores types from metadata, + // so that the types behind pointers can be accessed. + // Non-pointer entries are ignored. Values with a pointer type are + // replaced by a metadata tuple with {original value, type md}. We cannot + // save the metadata outside because after conversion to opaque pointers, + // entries are not distinguishable anymore (e.g. i32* and i8* are both + // upgraded to ptr). + std::optional( + [&](Metadata **Val, unsigned TypeID, GetTypeByIDTy GetTypeByID, + GetContainedTypeIDTy GetContainedTypeID) { + auto *OrigVal = cast(*Val); + if (OrigVal->getType()->isPointerTy()) { + // Ignore function references, their signature can be saved like + // in the test above + if (!isa(OrigVal->getValue())) { + SmallVector Tuple; + Tuple.push_back(OrigVal); + Tuple.push_back(getTypeMetadataEntry( + GetContainedTypeID(TypeID, 0), OrigVal->getContext(), + GetTypeByID, GetContainedTypeID)); + *Val = MDTuple::get(OrigVal->getContext(), Tuple); + } + } + })); + + if (!ModuleOrErr) + report_fatal_error("Could not parse bitcode module"); + std::unique_ptr M = std::move(ModuleOrErr.get()); + + EXPECT_EQ( + mdToString(M->getNamedMetadata("md")->getOperand(0)), + "!{2, !{ptr, %dx.types.f32}, ptr, !{ptr, !{!'function', !'void'}}}"); + EXPECT_EQ(mdToString(M->getNamedMetadata("md2")->getOperand(0)), + "!{!{ptr, !{!'function', !{0, i8}, !{2, !{0, i32}}}}, !{ptr, !{0, " + "!{0, i32}}}}"); +} + } // end namespace