diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -21,11 +21,17 @@ using namespace bolt; namespace opts { + static cl::opt PrintORC("print-orc", cl::desc("print ORC unwind information for instructions"), - cl::init(true), cl::cat(BoltCategory)); -} + cl::init(true), cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt + DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"), + cl::init(false), cl::Hidden, cl::cat(BoltCategory)); + +} // namespace opts /// Linux Kernel supports stack unwinding using ORC (oops rewind capability). /// ORC state at every IP can be described by the following data structure. @@ -83,6 +89,14 @@ uint64_t IP; /// Instruction address. BinaryFunction *BF; /// Binary function corresponding to the entry. ORCState ORC; /// Stack unwind info in ORC format. + + bool operator<(const ORCListEntry &Other) const { + if (IP < Other.IP) + return 1; + if (IP > Other.IP) + return 0; + return ORC == NullORC; + } }; using ORCListType = std::vector; @@ -463,10 +477,22 @@ BC.AsmInfo->getCodePointerSize()); DataExtractor::Cursor ORCCursor(0); DataExtractor::Cursor IPCursor(0); + uint64_t PrevIP = 0; for (uint32_t Index = 0; Index < NumEntries; ++Index) { const uint64_t IP = IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); + // Consume the status of the cursor. + if (!IPCursor) + return createStringError(errc::executable_format_error, + "out of bounds while reading ORC IP table"); + + if (IP < PrevIP && opts::Verbosity) + errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) + << " detected while reading ORC\n"; + + PrevIP = IP; + // Store all entries, includes those we are not going to update as the // tables need to be sorted globally before being written out. ORCEntries.push_back(ORCListEntry()); @@ -477,8 +503,8 @@ Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor); Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor); - // Consume the status of cursors. - if (!IPCursor || !ORCCursor) + // Consume the status of the cursor. + if (!ORCCursor) return createStringError(errc::executable_format_error, "out of bounds while reading ORC"); @@ -502,7 +528,12 @@ continue; } - if (!BC.shouldEmit(*BF) || Entry.ORC == NullORC) + if (Entry.ORC == NullORC) + continue; + + BF->setHasORC(true); + + if (!BF->hasInstructions()) continue; MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress()); @@ -520,8 +551,20 @@ "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP); BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC); + } - BF->setHasORC(true); + // Older kernels could contain unsorted tables in the file as the tables were + // sorted during boot time. + llvm::sort(ORCEntries); + + if (opts::DumpORC) { + outs() << "BOLT-INFO: ORC unwind information:\n"; + for (const ORCListEntry &E : ORCEntries) { + outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; + if (E.BF) + outs() << ": " << *E.BF; + outs() << '\n'; + } } return Error::success(); @@ -535,8 +578,6 @@ // regardless of the basic block layout. Note that if we insert/delete // instructions, we must take care to attach ORC info to the new/deleted ones. for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { - if (!BF.hasORC()) - continue; std::optional CurrentState; for (BinaryBasicBlock &BB : BF) { @@ -549,8 +590,24 @@ continue; } - if (!CurrentState) - continue; + // In case there was no ORC entry that matched the function start + // address, we need to propagate ORC state from the previous entry. + if (!CurrentState) { + auto It = + llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { + return E.IP < BF.getAddress(); + }); + if (It != ORCEntries.begin()) + It = std::prev(It); + + if (It->ORC == NullORC && BF.hasORC()) + errs() << "BOLT-WARNING: ORC unwind info excludes prologue for " + << BF << '\n'; + + CurrentState = It->ORC; + if (It->ORC != NullORC) + BF.setHasORC(true); + } // While printing ORC, attach info to every instruction for convenience. if (opts::PrintORC || &Inst == &BB.front()) diff --git a/bolt/test/X86/orc_unwind.s b/bolt/test/X86/orc_unwind.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/orc_unwind.s @@ -0,0 +1,118 @@ +# REQUIRES: system-linux + +## Check that BOLT correctly reads ORC unwind information used by Linux Kernel. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe + +# RUN: llvm-bolt %t.exe --print-normalized --dump-orc --print-orc -o %t.out \ +# RUN: |& FileCheck %s + +# CHECK: BOLT-INFO: ORC unwind information: +# CHECK-NEXT: {sp: 8, bp: 0, info: 0x5}: _start +# CHECK-NEXT: {sp: 0, bp: 0, info: 0x0}: _start +# CHECK-NEXT: {sp: 8, bp: 0, info: 0x5}: foo +# CHECK-NEXT: {sp: 16, bp: -16, info: 0x15}: foo +# CHECK-NEXT: {sp: 16, bp: -16, info: 0x14}: foo +# CHECK-NEXT: {sp: 8, bp: 0, info: 0x5}: foo +# CHECK-NEXT: {sp: 0, bp: 0, info: 0x0}: bar + + .text + .globl _start + .type _start, %function +_start: + .cfi_startproc + + call foo +# CHECK: callq foo # ORC: {sp: 8, bp: 0, info: 0x5} + ret + .cfi_endproc + .size _start, .-_start + + .globl foo + .type foo, %function +foo: + .cfi_startproc + push %rbp +# CHECK: pushq %rbp # ORC: {sp: 8, bp: 0, info: 0x5} +.L1: + mov %rsp, %rbp +# CHECK: movq %rsp, %rbp # ORC: {sp: 16, bp: -16, info: 0x15} +.L2: + pop %rbp +# CHECK: popq %rbp # ORC: {sp: 16, bp: -16, info: 0x14} +.L3: + ret +# CHECK: retq # ORC: {sp: 8, bp: 0, info: 0x5} + .cfi_endproc + .size foo, .-foo + +bar: + .cfi_startproc + ret +# Same ORC info propagated from foo above. +# CHECK: retq # ORC: {sp: 8, bp: 0, info: 0x5} +.L4: + .cfi_endproc + .size bar, .-bar + + .section .orc_unwind,"a",@progbits + .align 4 + .section .orc_unwind_ip,"a",@progbits + .align 4 + +# ORC for _start + .section .orc_unwind + .2byte 8 + .2byte 0 + .2byte 5 + .section .orc_unwind_ip + .long _start - . + + .section .orc_unwind + .2byte 0 + .2byte 0 + .2byte 0 + .section .orc_unwind_ip + .long foo - . + +# ORC for foo + .section .orc_unwind + .2byte 8 + .2byte 0 + .2byte 5 + .section .orc_unwind_ip + .long foo - . + + .section .orc_unwind + .2byte 16 + .2byte -16 + .2byte 21 + .section .orc_unwind_ip + .long .L1 - . + + .section .orc_unwind + .2byte 16 + .2byte -16 + .2byte 20 + .section .orc_unwind_ip + .long .L2 - . + + .section .orc_unwind + .2byte 8 + .2byte 0 + .2byte 5 + .section .orc_unwind_ip + .long .L3 - . + + .section .orc_unwind + .2byte 0 + .2byte 0 + .2byte 0 + .section .orc_unwind_ip + .long .L4 - . + +# Fake Linux Kernel sections + .section __ksymtab,"a",@progbits + .section __ksymtab_gpl,"a",@progbits + .section .pci_fixup,"a",@progbits