diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -585,14 +585,15 @@ 52, // CATCHSWITCH: [num,args...] or [num,args...,bb] // 53 is unused. // 54 is unused. - FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] - FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] - FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, - // fnty, fnid, args...] - FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] - FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, - // operation, align, vol, - // ordering, synchscope] + FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] + FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] + FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, + // fnty, fnid, args...] + FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] + FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, + // operation, align, vol, + // ordering, synchscope] + FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...] }; enum UseListCodes { diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -267,6 +267,7 @@ STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) + STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS) } case bitc::VALUE_SYMTAB_BLOCK_ID: switch (CodeID) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -558,6 +558,13 @@ DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs; std::deque<Function *> BasicBlockFwdRefQueue; + /// These are Functions that contain BlockAddresses which refer a different + /// Function. When parsing the different Function, queue Functions that refer + /// to the different Function. Those Functions must be materialized in order + /// to resolve their BlockAddress constants before the different Function + /// gets moved into another Module. + std::vector<Function *> BackwardRefFunctions; + /// Indicates that we are using a new encoding for instruction operands where /// most operands in the current FUNCTION_BLOCK are encoded relative to the /// instruction number, for a more compact encoding. Some instruction @@ -881,6 +888,11 @@ } assert(BasicBlockFwdRefs.empty() && "Function missing from queue"); + for (Function *F : BackwardRefFunctions) + if (Error Err = materialize(F)) + return Err; + BackwardRefFunctions.clear(); + // Reset state. WillMaterializeAllForwardRefs = false; return Error::success(); @@ -4317,6 +4329,31 @@ continue; } + case bitc::FUNC_CODE_BLOCKADDR_USERS: // BLOCKADDR_USERS: [vals...] + // The record should not be emitted if it's an empty list. + if (Record.empty()) + return error("Invalid record"); + // When we have the RARE case of a BlockAddress Constant that is not + // scoped to the Function it refers to, we need to conservatively + // materialize the referred to Function, regardless of whether or not + // that Function will ultimately be linked, otherwise users of + // BitcodeReader might start splicing out Function bodies such that we + // might no longer be able to materialize the BlockAddress since the + // BasicBlock (and entire body of the Function) the BlockAddress refers + // to may have been moved. In the case that the user of BitcodeReader + // decides ultimately not to link the Function body, materializing here + // could be considered wasteful, but it's better than a deserialization + // failure as described. This keeps BitcodeReader unaware of complex + // linkage policy decisions such as those use by LTO, leaving those + // decisions "one layer up." + for (uint64_t ValID : Record) + if (auto *F = dyn_cast<Function>(ValueList[ValID])) + BackwardRefFunctions.push_back(F); + else + return error("Invalid record"); + + continue; + case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -3359,8 +3360,10 @@ bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; + SmallPtrSet<Function *, 4> BlockAddressUsers; + // Finally, emit all the instructions, in order. - for (const BasicBlock &BB : F) + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { writeInstruction(I, InstID, Vals); @@ -3392,6 +3395,25 @@ LastDL = DL; } + if (BlockAddress *BA = BlockAddress::lookup(&BB)) { + for (User *U : BA->users()) { + if (auto *I = dyn_cast<Instruction>(U)) { + Function *P = I->getParent()->getParent(); + if (P != &F) + BlockAddressUsers.insert(P); + } + } + } + } + + if (!BlockAddressUsers.empty()) { + SmallVector<uint64_t, 4> Record; + Record.reserve(BlockAddressUsers.size()); + for (Function *F : BlockAddressUsers) + Record.push_back(VE.getValueID(F)); + Stream.EmitRecord(bitc::FUNC_CODE_BLOCKADDR_USERS, Record); + } + // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) writeFunctionLevelValueSymbolTable(*Symtab); diff --git a/llvm/test/Bitcode/blockaddress-users.ll b/llvm/test/Bitcode/blockaddress-users.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/blockaddress-users.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s +; RUN: llvm-dis %t.bc + +; There's a curious case where blockaddress constants may refer to functions +; outside of the function they're used in. There's a special bitcode function +; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case. + +; The intent of this test is two-fold: +; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn, +; @repro, since @fun and @fun2 both refer to @repro via blockaddress +; constants. +; 2. Ensure we can round-trip serializing+desearlizing such case. + +; CHECK: <FUNCTION_BLOCK +; CHECK: <BLOCKADDR_USERS op0=2 op1=1 +; CHECK: <FUNCTION_BLOCK +; CHECK: <FUNCTION_BLOCK + +define void @repro() { + br label %label + +label: + call void @fun() + ret void +} + +define void @fun() noinline { + call void @f(i8* blockaddress(@repro, %label)) + ret void +} + +define void @fun2() noinline { + call void @f(i8* blockaddress(@repro, %label)) + ret void +} + +declare void @f(i8*) diff --git a/llvm/test/Linker/blockaddress.ll b/llvm/test/Linker/blockaddress.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Linker/blockaddress.ll @@ -0,0 +1,125 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-link %t.bc -S | FileCheck %s + +declare void @f(i8*) + +; Test that a blockaddress in @y referring to %label in @x can be moved when @y +; appears after @x. +define void @x() { + br label %label + +label: + call void @y() + ret void +} + +define void @y() { +; CHECK: define void @y() { +; CHECK-NEXT: call void @f(i8* blockaddress(@x, %label)) + call void @f(i8* blockaddress(@x, %label)) + ret void +} + +; Test that a blockaddress in @a referring to %label in @b can be moved when @a +; appears before @b. +define void @a() { +; CHECK: define void @a() { +; CHECK-NEXT: call void @f(i8* blockaddress(@b, %label)) + call void @f(i8* blockaddress(@b, %label)) + ret void +} + +define void @b() { + br label %label + +label: + call void @a() + ret void +} + +; Test that @c and @d can both have blockaddress Constants that refer to one +; another. + +define void @c() { +; CHECK: define void @c() { +; CHECK-NEXT: br label %label +; CHECK-EMPTY: +; CHECK-NEXT: label: +; CHECK-NEXT: call void @f(i8* blockaddress(@d, %label)) + br label %label + +label: + call void @f(i8* blockaddress(@d, %label)) + ret void +} + +define void @d() { +; CHECK: define void @d() { +; CHECK-NEXT: br label %label +; CHECK-EMPTY: +; CHECK-NEXT: label: +; CHECK-NEXT: call void @f(i8* blockaddress(@c, %label)) + br label %label + +label: + call void @f(i8* blockaddress(@c, %label)) + ret void +} + +; Test that Functions added to IRLinker's Worklist member lazily (linkonce_odr) +; aren't susceptible to the the same issues as @x/@y above. +define void @parsed() { + br label %label + +label: + ret void +} + +define linkonce_odr void @lazy() { +; CHECK: define linkonce_odr void @lazy() { +; CHECK-NEXT: br label %label +; CHECK-EMPTY: +; CHECK-NEXT: label: +; CHECK-NEXT: call void @f(i8* blockaddress(@parsed, %label)) + br label %label + +label: + call void @f(i8* blockaddress(@parsed, %label)) + ret void +} + +define void @parsed2() { + call void @lazy() + ret void +} + +; Same test as @lazy, just with one more level of lazy parsed functions. +define void @parsed3() { + br label %label + +label: + ret void +} + +define linkonce_odr void @lazy1() { +; CHECK: define linkonce_odr void @lazy1() { +; CHECK-NEXT: br label %label +; CHECK-EMPTY: +; CHECK-NEXT: label: +; CHECK-NEXT: call void @f(i8* blockaddress(@parsed3, %label)) + br label %label + +label: + call void @f(i8* blockaddress(@parsed3, %label)) + ret void +} + +define linkonce_odr void @lazy2() { + call void @lazy1() + ret void +} + +define void @parsed4() { + call void @lazy2() + ret void +}