diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -27,14 +27,24 @@ #include namespace llvm { +class Function; class LLVMContext; class Module; class MemoryBuffer; class ModuleSummaryIndex; +class Type; typedef llvm::function_ref(StringRef)> DataLayoutCallbackTy; +typedef llvm::function_ref GetTypeByIDTy; + +typedef llvm::function_ref GetContainedTypeIDTy; + +typedef llvm::function_ref + FunctionTypeCallbackTy; + // These functions are for converting Expected/Error values to // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: // Remove these functions once no longer needed by the C and libLTO APIs. @@ -84,7 +94,8 @@ Expected> getModuleImpl(LLVMContext &Context, bool MaterializeAll, bool ShouldLazyLoadMetadata, bool IsImporting, - DataLayoutCallbackTy DataLayoutCallback); + DataLayoutCallbackTy DataLayoutCallback, + FunctionTypeCallbackTy FunctionTypeCallback); public: StringRef getBuffer() const { @@ -105,8 +116,11 @@ /// Read the entire bitcode module and return it. Expected> parseModule( - LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = - [](StringRef) { return None; }); + LLVMContext &Context, + DataLayoutCallbackTy DataLayoutCallback = + [](StringRef) { return None; }, + FunctionTypeCallbackTy FunctionTypeCallback = + [](Function *, unsigned, GetTypeByIDTy, GetContainedTypeIDTy) {}); /// Returns information about the module to be used for LTO: whether to /// compile with ThinLTO, and whether it has a summary. @@ -169,11 +183,18 @@ Expected getBitcodeProducerString(MemoryBufferRef Buffer); /// Read the specified bitcode file, returning the module. + /// The FunctionTypeCallback is called for every function definition or + /// declaration and allows accessing the type information, also behind + /// pointers. This can be useful, when the opaque pointer upgrade cleans all + /// type information behind pointers. + /// The second argument to FunctionTypeCallback is the type ID of the + /// function, the two passed functions can be used to extract type + /// information. Expected> parseBitcodeFile( MemoryBufferRef Buffer, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { - return None; - }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }, + FunctionTypeCallbackTy FunctionTypeCallback = + [](Function *, unsigned, GetTypeByIDTy, GetContainedTypeIDTy) {}); /// Returns LTO information for the specified bitcode file. Expected getBitcodeLTOInfo(MemoryBufferRef Buffer); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -583,6 +583,8 @@ std::vector BundleTags; SmallVector SSIDs; + Optional FunctionTypeCallback; + public: BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context); @@ -597,7 +599,9 @@ /// \returns true if an error occurred. Error parseBitcodeInto( Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }, + FunctionTypeCallbackTy FunctionTypeCallback = + [](Function *, unsigned, GetTypeByIDTy, GetContainedTypeIDTy) {}); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -618,6 +622,7 @@ unsigned getContainedTypeID(unsigned ID, unsigned Idx = 0); unsigned getVirtualTypeID(Type *Ty, ArrayRef ContainedTypeIDs = {}); + void callFunctionTypeCallback(Function *F, unsigned TypeID); Value *getFnValueByID(unsigned ID, Type *Ty, unsigned TyID) { if (Ty && Ty->isMetadataTy()) return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID)); @@ -720,7 +725,9 @@ Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule( uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }, + FunctionTypeCallbackTy FunctionTypeCallback = + [](Function *, unsigned, GetTypeByIDTy, GetContainedTypeIDTy) {}); Error parseComdatRecord(ArrayRef Record); Error parseGlobalVarRecord(ArrayRef Record); @@ -3566,6 +3573,14 @@ return Error::success(); } +void BitcodeReader::callFunctionTypeCallback(Function *F, unsigned TypeID) { + if (FunctionTypeCallback) { + (*FunctionTypeCallback)( + F, TypeID, [this](unsigned I) { return getTypeByID(I); }, + [this](unsigned I, unsigned J) { return getContainedTypeID(I, J); }); + } +} + Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, @@ -3610,6 +3625,7 @@ uint64_t RawLinkage = Record[3]; Func->setLinkage(getDecodedLinkage(RawLinkage)); Func->setAttributes(getAttributes(Record[4])); + callFunctionTypeCallback(Func, FTyID); // Upgrade any old-style byval or sret without a type by propagating the // argument's pointee type. There should be no opaque pointers where the byval @@ -3819,7 +3835,9 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + FunctionTypeCallbackTy FunctionTypeCallback) { + this->FunctionTypeCallback = FunctionTypeCallback; if (ResumeBit) { if (Error JumpFailed = Stream.JumpToBit(ResumeBit)) return JumpFailed; @@ -4104,15 +4122,18 @@ } Record.clear(); } + this->FunctionTypeCallback = None; } -Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata, - bool IsImporting, - DataLayoutCallbackTy DataLayoutCallback) { +Error BitcodeReader::parseBitcodeInto( + Module *M, bool ShouldLazyLoadMetadata, bool IsImporting, + DataLayoutCallbackTy DataLayoutCallback, + FunctionTypeCallbackTy FunctionTypeCallback) { TheModule = M; MDLoader = MetadataLoader(Stream, *M, ValueList, IsImporting, [&](unsigned ID) { return getTypeByID(ID); }); - return parseModule(0, ShouldLazyLoadMetadata, DataLayoutCallback); + return parseModule(0, ShouldLazyLoadMetadata, DataLayoutCallback, + FunctionTypeCallback); } Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) { @@ -7328,7 +7349,8 @@ Expected> BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, bool ShouldLazyLoadMetadata, bool IsImporting, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + FunctionTypeCallbackTy FunctionTypeCallback) { BitstreamCursor Stream(Buffer); std::string ProducerIdentification; @@ -7350,8 +7372,9 @@ M->setMaterializer(R); // Delay parsing Metadata if ShouldLazyLoadMetadata is true. - if (Error Err = R->parseBitcodeInto(M.get(), ShouldLazyLoadMetadata, - IsImporting, DataLayoutCallback)) + if (Error Err = + R->parseBitcodeInto(M.get(), ShouldLazyLoadMetadata, IsImporting, + DataLayoutCallback, FunctionTypeCallback)) return std::move(Err); if (MaterializeAll) { @@ -7369,8 +7392,10 @@ Expected> BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting) { - return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting, - [](StringRef) { return None; }); + return getModuleImpl( + Context, false, ShouldLazyLoadMetadata, IsImporting, + [](StringRef) { return None; }, + [](Function *, unsigned, GetTypeByIDTy, GetContainedTypeIDTy) {}); } // Parse the specified bitcode buffer and merge the index into CombinedIndex. @@ -7535,20 +7560,23 @@ Expected> BitcodeModule::parseModule(LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback) { - return getModuleImpl(Context, true, false, false, DataLayoutCallback); + DataLayoutCallbackTy DataLayoutCallback, + FunctionTypeCallbackTy FunctionTypeCallback) { + return getModuleImpl(Context, true, false, false, DataLayoutCallback, + FunctionTypeCallback); // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. } Expected> llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + FunctionTypeCallbackTy FunctionTypeCallback) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->parseModule(Context, DataLayoutCallback); + return BM->parseModule(Context, DataLayoutCallback, FunctionTypeCallback); } Expected llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) { diff --git a/llvm/unittests/Bitcode/BitReaderTest.cpp b/llvm/unittests/Bitcode/BitReaderTest.cpp --- a/llvm/unittests/Bitcode/BitReaderTest.cpp +++ b/llvm/unittests/Bitcode/BitReaderTest.cpp @@ -11,6 +11,7 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -255,4 +256,105 @@ EXPECT_FALSE(verifyModule(*M, &dbgs())); } +// Helper function to convert type metadata to a string for testing +static std::string mdToString(Metadata *MD) { + std::string S; + if (auto *TMD = dyn_cast(MD)) { + S += "!{"; + for (unsigned I = 0; I < TMD->getNumOperands(); I++) { + if (I != 0) + S += ", "; + S += mdToString(TMD->getOperand(I).get()); + } + S += "}"; + } else if (auto *I = mdconst::dyn_extract(MD)) { + S += std::to_string(I->getZExtValue()); + } else if (auto *P = mdconst::dyn_extract(MD)) { + auto *Ty = P->getType(); + assert(Ty->isIntegerTy()); + S += "i"; + S += std::to_string(Ty->getIntegerBitWidth()); + } else { + llvm_unreachable("unhandled metadata"); + } + return S; +} + +// Test that when reading bitcode with typed pointers and upgrading them to +// opaque pointers, the type information of function signatures can be extracted +// and stored e.g. in metadata. +TEST(BitReaderTest, AccessBitcodeTypeInfo) { + SmallString<1024> Mem; + LLVMContext WriteContext; + writeModuleToBuffer( + parseAssembly( + WriteContext, + "define void @func() {\n" + " unreachable\n" + "}\n" + "declare i32 @func_header()\n" + "declare i8* @ret_ptr()\n" + "declare i8* @ret_and_arg_ptr(i32 addrspace(8)*)\n" + "declare i8 addrspace(1)* @double_ptr(i32* addrspace(2)*, i32***)\n"), + Mem); + + LLVMContext Context; + Context.setOpaquePointers(true); + + Expected> ModuleOrErr = parseBitcodeFile( + MemoryBufferRef(Mem.str(), "test"), Context, + [](StringRef) { return None; }, + // Supply a lambda that stores the signature of a function into metadata, + // so that the types behind pointers can be accessed. + // Each function gets a !types metadata, which is a tuple with one element + // for a non-void return type and every argument. For primitive types it's + // a poison value of the type, for a pointer it's a metadata tuple with + // the addrspace and the referenced type. + [&](Function *F, unsigned TypeID, GetTypeByIDTy GetTypeByID, + GetContainedTypeIDTy GetContainedTypeID) { + // Recursively look into a (pointer) type and the the type + std::function GetTypeMetadataEntry; + GetTypeMetadataEntry = [&](unsigned TypeID) -> Metadata * { + Type *Ty = GetTypeByID(TypeID); + if (!Ty->isPointerTy()) + return ConstantAsMetadata::get(PoisonValue::get(Ty)); + + // Return !{, } for pointer + SmallVector MD; + MD.push_back(ConstantAsMetadata::get(ConstantInt::get( + Type::getInt32Ty(Context), Ty->getPointerAddressSpace()))); + MD.push_back(GetTypeMetadataEntry(GetContainedTypeID(TypeID, 0))); + return MDTuple::get(Context, MD); + }; + + // Save the function signature as metadata + SmallVector SignatureMD; + // Return type + if (!F->getFunctionType()->getReturnType()->isVoidTy()) + SignatureMD.push_back( + GetTypeMetadataEntry(GetContainedTypeID(TypeID, 0))); + // Arguments + for (unsigned I = 0; I != F->arg_size(); ++I) + SignatureMD.push_back( + GetTypeMetadataEntry(GetContainedTypeID(TypeID, I + 1))); + + MDNode *MD = MDTuple::get(Context, SignatureMD); + F->setMetadata("types", MD); + }); + + if (!ModuleOrErr) + report_fatal_error("Could not parse bitcode module"); + std::unique_ptr M = std::move(ModuleOrErr.get()); + + EXPECT_EQ(mdToString(M->getFunction("func")->getMetadata("types")), "!{}"); + EXPECT_EQ(mdToString(M->getFunction("func_header")->getMetadata("types")), + "!{i32}"); + EXPECT_EQ(mdToString(M->getFunction("ret_ptr")->getMetadata("types")), + "!{!{0, i8}}"); + EXPECT_EQ(mdToString(M->getFunction("ret_and_arg_ptr")->getMetadata("types")), + "!{!{0, i8}, !{8, i32}}"); + EXPECT_EQ(mdToString(M->getFunction("double_ptr")->getMetadata("types")), + "!{!{1, i8}, !{2, !{0, i32}}, !{0, !{0, !{0, i32}}}}"); +} + } // end namespace