diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -498,6 +498,14 @@ /// to function \p BF. std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address); + /// Free memory used by jump table offsets + void clearJumpTableOffsets() { + for (auto& JTI: JumpTables) { + JumpTable& JT = *JTI.second; + JumpTable::OffsetsType Temp; + Temp.swap(JT.OffsetEntries); + } + } /// Return true if the array of bytes represents a valid code padding. bool hasValidCodePadding(const BinaryFunction &BF); diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -764,12 +764,26 @@ const MCSymbol * BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, JumpTable::JumpTableType Type) { + auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) { + return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); + }; + if (JumpTable *JT = getJumpTableContainingAddress(Address)) { assert(JT->Type == Type && "jump table types have to match"); - assert(JT->Parent == &Function && + assert((JT->Parent == &Function || isFragmentOf(JT->Parent, &Function) || + isFragmentOf(&Function, JT->Parent)) && "cannot re-use jump table of a different function"); assert(Address == JT->getAddress() && "unexpected non-empty jump table"); + // Flush OffsetEntries with INVALID_OFFSET if multiple parents + // Duplicate the entry for the parent function for easy access + if (isFragmentOf(JT->Parent, &Function) || + isFragmentOf(&Function, JT->Parent)) { + constexpr uint64_t INVALID_OFFSET = std::numeric_limits::max(); + for (unsigned I = 0; I < JT->OffsetEntries.size(); ++I) + JT->OffsetEntries[I] = INVALID_OFFSET; + Function.JumpTables.emplace(Address, JT); + } return JT->getFirstLabel(); } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1654,11 +1654,13 @@ "detected in function " << *this << '\n'; } - for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) { - MCSymbol *Label = - getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I], - /*CreatePastEnd*/ true); - JT.Entries.push_back(Label); + if (JT.Entries.empty()) { + for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) { + MCSymbol *Label = + getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I], + /*CreatePastEnd*/ true); + JT.Entries.push_back(Label); + } } const uint64_t BDSize = @@ -1700,12 +1702,6 @@ } clearList(JTSites); - // Free memory used by jump table offsets. - for (auto &JTI : JumpTables) { - JumpTable &JT = *JTI.second; - clearList(JT.OffsetEntries); - } - // Conservatively populate all possible destinations for unknown indirect // branches. if (opts::StrictMode && hasInternalReference()) { diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2903,6 +2903,7 @@ BC->processInterproceduralReferences(Function); } + BC->clearJumpTableOffsets(); BC->populateJumpTables(); BC->skipMarkedFragments(); diff --git a/bolt/test/X86/split-func-jump-table-fragment-bidirection.s b/bolt/test/X86/split-func-jump-table-fragment-bidirection.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/split-func-jump-table-fragment-bidirection.s @@ -0,0 +1,79 @@ +# This reproduces an issue where two cold fragments are folded into one, so the +# fragment has two parents. +# The fragment is only reachable through a jump table, so all functions must be +# ignored. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: cp %t.exe ~/local/test.exe +# RUN: llvm-bolt %t.exe -o %t.out -v=1 --print-disasm 2>&1 + + .text + .globl main + .type main, %function + .p2align 2 +main: +LBB0: + andl $0xf, %ecx + cmpb $0x4, %cl + # exit through ret + ja LBB3 + +# jump table dispatch, jumping to label indexed by val in %ecx +LBB1: + leaq JUMP_TABLE1(%rip), %r8 + movzbl %cl, %ecx + movslq (%r8,%rcx,4), %rax + addq %rax, %r8 + jmpq *%r8 + +LBB2: + xorq %rax, %rax +LBB3: + addq $0x8, %rsp + ret +.size main, .-main + +# cold fragment is only reachable + .globl main.cold.1 + .type main.cold.1, %function + .p2align 2 +main.cold.1: + # load bearing nop: pad LBB8 so that it can't be treated + # as __builtin_unreachable by analyzeJumpTable + nop +LBB4: + andl $0xb, %ebx + cmpb $0x1, %cl + # exit through ret + ja LBB7 + +# jump table dispatch, jumping to label indexed by val in %ecx +LBB5: + leaq JUMP_TABLE1(%rip), %r8 + movzbl %cl, %ecx + movslq (%r8,%rcx,4), %rax + addq %rax, %r8 + jmpq *%r8 + +LBB6: + xorq %rax, %rax +LBB7: + addq $0x8, %rsp + ret +LBB8: + callq abort +.size main.cold.1, .-main.cold.1 + + .rodata +# jmp table, entries must be R_X86_64_PC32 relocs + .globl JUMP_TABLE1 +JUMP_TABLE1: + .long LBB2-JUMP_TABLE1 + .long LBB3-JUMP_TABLE1 + .long LBB8-JUMP_TABLE1 + .long LBB6-JUMP_TABLE1 + .long LBB7-JUMP_TABLE1