Index: bolt/include/bolt/Core/BinaryFunction.h =================================================================== --- bolt/include/bolt/Core/BinaryFunction.h +++ bolt/include/bolt/Core/BinaryFunction.h @@ -373,7 +373,8 @@ BinaryFunction *FoldedIntoFunction{nullptr}; /// All fragments for a parent function. - SmallPtrSet Fragments; + using FragmentsSetTy = SmallPtrSet; + FragmentsSetTy Fragments; /// The profile data for the number of times the function was executed. uint64_t ExecutionCount{COUNT_NO_PROFILE}; @@ -1779,6 +1780,17 @@ return llvm::is_contained(Fragments, &Other); } + /// Return the child fragment form parent function + iterator_range getFragments() const { + return iterator_range(Fragments.begin(), + Fragments.end()); + } + + /// Return the parent function for split function fragments. + FragmentsSetTy *getParentFragments() { + return &ParentFragments; + } + /// Returns if this function is a parent or child of \p Other function. bool isParentOrChildOf(const BinaryFunction &Other) const { return isChildOf(Other) || isParentOf(Other); Index: bolt/lib/Passes/RegReAssign.cpp =================================================================== --- bolt/lib/Passes/RegReAssign.cpp +++ bolt/lib/Passes/RegReAssign.cpp @@ -140,7 +140,7 @@ std::fill(RegScore.begin(), RegScore.end(), 0); std::fill(RankedRegs.begin(), RankedRegs.end(), 0); - for (BinaryBasicBlock &BB : Function) { + auto countRegScore = [&](BinaryBasicBlock &BB) { for (MCInst &Inst : BB) { const bool CannotUseREX = BC.MIB->cannotUseREX(Inst); const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); @@ -191,7 +191,16 @@ RegScore[RegEC] += BB.getKnownExecutionCount(); } } + }; + for (BinaryBasicBlock &BB : Function) { + countRegScore(BB); + } + for (BinaryFunction *ChildFrag : Function.getFragments()) { + for (BinaryBasicBlock &BB : *ChildFrag) { + countRegScore(BB); + } } + std::iota(RankedRegs.begin(), RankedRegs.end(), 0); // 0, 1, 2, 3... llvm::sort(RankedRegs, [&](size_t A, size_t B) { return RegScore[A] > RegScore[B]; }); @@ -213,6 +222,17 @@ BinaryContext &BC = Function.getBinaryContext(); rankRegisters(Function); + // If there is a situation where function: + // A() -> A.cold() + // A.localalias() -> A.cold() + // simply swapping these two calls can cause issues. + for (BinaryFunction *ChildFrag : Function.getFragments()) { + if (ChildFrag->getParentFragments()->size() > 1) return; + if (ChildFrag->empty()) { + return; + } + } + // Bail early if our registers are all black listed, before running expensive // analysis passes bool Bail = true; @@ -304,6 +324,10 @@ << " with " << BC.MRI->getName(ExtReg) << "\n\n"); swap(Function, ClassicReg, ExtReg); FuncsChanged.insert(&Function); + for (BinaryFunction *ChildFrag : Function.getFragments()) { + swap(*ChildFrag, ClassicReg, ExtReg); + FuncsChanged.insert(ChildFrag); + } ++Begin; if (Begin == End) break; @@ -315,6 +339,13 @@ BinaryContext &BC = Function.getBinaryContext(); rankRegisters(Function); + for (BinaryFunction *ChildFrag : Function.getFragments()) { + if (ChildFrag->getParentFragments()->size() > 1) return false; + if (ChildFrag->empty()) { + return false; + } + } + // Try swapping R12, R13, R14 or R15 with RBX (we work with all callee-saved // regs except RBP) MCPhysReg Candidate = 0; @@ -345,6 +376,10 @@ (void)BC; swap(Function, RBX, Candidate); FuncsChanged.insert(&Function); + for (BinaryFunction *ChildFrag : Function.getFragments()) { + swap(*ChildFrag, RBX, Candidate); + FuncsChanged.insert(ChildFrag); + } return true; } @@ -404,7 +439,7 @@ for (auto &I : BC.getBinaryFunctions()) { BinaryFunction &Function = I.second; - if (!Function.isSimple() || Function.isIgnored()) + if (!Function.isSimple() || Function.isIgnored() || Function.isFragment()) continue; LLVM_DEBUG(dbgs() << "====================================\n"); Index: bolt/test/X86/reg-reassign-swap-cold.s =================================================================== --- /dev/null +++ bolt/test/X86/reg-reassign-swap-cold.s @@ -0,0 +1,53 @@ +# This test case reproduces a bug where, during register swapping, +# the code fragments associated with the function need to be swapped +# together (which may be generated during PGO optimization). If not +# handled properly, optimized binary execution can result in a segmentation fault. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: %clang -no-pie %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.out -data=%t.fdata --reg-reassign | FileCheck %s +# RUN: %t.out + +# CHECK: BOLT-INFO: Reg Reassignment Pass Stats +# CHECK-NEXT: 2 functions affected. + .text + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function + .type main.cold,@function +main.cold: +.L1: + mov $0x3, %rbx + jmp .L3 +main: # @main + .cfi_startproc +# %bb.0: # %entry + pushq %rax + pushq %r12 + pushq %rbx + .cfi_def_cfa_offset 16 + mov $0x1, %r12 + mov $0x2, %rbx +.L2: + add $0x1, %r12 + shr $0x14, %r12 + jmp .L1 +.L3: + cmp $0x3, %r12 + je 0x0 + xorl %eax, %eax + popq %rcx + popq %rbx + popq %r12 + .cfi_def_cfa_offset 8 + retq +# FDATA: 1 main.cold/1 7 1 main 0 0 100 +# FDATA: 1 main 20 1 main 26 0 100 +# FDATA: 1 main 1a 1 main.cold/1 0 0 100 + +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc