diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -585,14 +585,15 @@ 52, // CATCHSWITCH: [num,args...] or [num,args...,bb] // 53 is unused. // 54 is unused. - FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] - FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] - FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, - // fnty, fnid, args...] - FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] - FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, - // operation, align, vol, - // ordering, synchscope] + FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] + FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] + FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, + // fnty, fnid, args...] + FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] + FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, + // operation, align, vol, + // ordering, synchscope] + FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...] }; enum UseListCodes { diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -267,6 +267,7 @@ STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) + STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS) } case bitc::VALUE_SYMTAB_BLOCK_ID: switch (CodeID) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -558,6 +558,13 @@ DenseMap> BasicBlockFwdRefs; std::deque BasicBlockFwdRefQueue; + /// These are Functions that contain BlockAddresses which refer a different + /// Function. When parsing the different Function, queue Functions that refer + /// to the different Function. Those Functions must be materialized in order + /// to resolve their BlockAddress constants before the different Function + /// gets moved into another Module. + std::vector BackwardRefFunctions; + /// Indicates that we are using a new encoding for instruction operands where /// most operands in the current FUNCTION_BLOCK are encoded relative to the /// instruction number, for a more compact encoding. Some instruction @@ -881,6 +888,11 @@ } assert(BasicBlockFwdRefs.empty() && "Function missing from queue"); + for (Function *F : BackwardRefFunctions) + if (Error Err = materialize(F)) + return Err; + BackwardRefFunctions.clear(); + // Reset state. WillMaterializeAllForwardRefs = false; return Error::success(); @@ -4317,6 +4329,31 @@ continue; } + case bitc::FUNC_CODE_BLOCKADDR_USERS: // BLOCKADDR_USERS: [vals...] + // The record should not be emitted if it's an empty list. + if (Record.empty()) + return error("Invalid record"); + // When we have the RARE case of a BlockAddress Constant that is not + // scoped to the Function it refers to, we need to conservatively + // materialize the referred to Function, regardless of whether or not + // that Function will ultimately be linked, otherwise users of + // BitcodeReader might start splicing out Function bodies such that we + // might no longer be able to materialize the BlockAddress since the + // BasicBlock (and entire body of the Function) the BlockAddress refers + // to may have been moved. In the case that the user of BitcodeReader + // decides ultimately not to link the Function body, materializing here + // could be considered wasteful, but it's better than a deserialization + // failure as described. This keeps BitcodeReader unaware of complex + // linkage policy decisions such as those use by LTO, leaving those + // decisions "one layer up." + for (uint64_t ValID : Record) + if (auto *F = dyn_cast(ValueList[ValID])) + BackwardRefFunctions.push_back(F); + else + return error("Invalid record"); + + continue; + case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -3359,8 +3360,10 @@ bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; + SmallPtrSet BlockAddressUsers; + // Finally, emit all the instructions, in order. - for (const BasicBlock &BB : F) + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { writeInstruction(I, InstID, Vals); @@ -3392,6 +3395,25 @@ LastDL = DL; } + if (BlockAddress *BA = BlockAddress::lookup(&BB)) { + for (User *U : BA->users()) { + if (auto *I = dyn_cast(U)) { + Function *P = I->getParent()->getParent(); + if (P != &F) + BlockAddressUsers.insert(P); + } + } + } + } + + if (!BlockAddressUsers.empty()) { + SmallVector Record; + Record.reserve(BlockAddressUsers.size()); + for (Function *F : BlockAddressUsers) + Record.push_back(VE.getValueID(F)); + Stream.EmitRecord(bitc::FUNC_CODE_BLOCKADDR_USERS, Record); + } + // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) writeFunctionLevelValueSymbolTable(*Symtab); diff --git a/llvm/test/Bitcode/blockaddress-users.ll b/llvm/test/Bitcode/blockaddress-users.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/blockaddress-users.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s +; RUN: llvm-dis %t.bc + +; There's a curious case where blockaddress constants may refer to functions +; outside of the function they're used in. There's a special bitcode function +; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case. + +; The intent of this test is two-fold: +; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn, +; @repro, since @fun and @fun2 both refer to @repro via blockaddress +; constants. +; 2. Ensure we can round-trip serializing+desearlizing such case. + +; CHECK: