diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h --- a/bolt/include/bolt/Passes/SplitFunctions.h +++ b/bolt/include/bolt/Passes/SplitFunctions.h @@ -16,12 +16,25 @@ namespace llvm { namespace bolt { +/// Strategy used to partition blocks into fragments. +enum SplitFunctionsStrategy : char { + /// Split each function into a hot and cold fragment using profiling + /// information. + Profile2 = 0, + /// Split each function into a hot and cold fragment at a randomly chosen + /// split point (ignoring any available profiling information). + Random2, + /// Split all basic blocks of each function into fragments such that each + /// fragment contains exactly a single basic block. + All +}; + /// Split function code in multiple parts. class SplitFunctions : public BinaryFunctionPass { private: /// Split function body into fragments. - template - void splitFunction(BinaryFunction &Function, SplitStrategy Strategy = {}); + template + void splitFunction(BinaryFunction &Function, Strategy S = {}); /// Map basic block labels to their trampoline block labels. using TrampolineSetType = DenseMap; diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -12,10 +12,12 @@ #include "bolt/Passes/SplitFunctions.h" #include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/FunctionLayout.h" #include "bolt/Core/ParallelUtilities.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" #include +#include #include #include @@ -66,7 +68,7 @@ static cl::opt SplitFunctions("split-functions", - cl::desc("split functions into hot and cold regions"), + cl::desc("split functions into fragments"), cl::cat(BoltOptCategory)); static cl::opt SplitThreshold( @@ -77,14 +79,25 @@ "increase after splitting."), cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt - RandomSplit("split-random", - cl::desc("split functions randomly into hot/cold regions"), - cl::Hidden); +static cl::opt SplitStrategy( + "split-strategy", cl::init(SplitFunctionsStrategy::Profile2), + cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2", + "split each function into a hot and cold fragment " + "using profiling information")), + cl::values(clEnumValN( + SplitFunctionsStrategy::Random2, "random2", + "split each function into a hot and cold fragment at a randomly chosen " + "split point (ignoring any available profiling information)")), + cl::values(clEnumValN( + SplitFunctionsStrategy::All, "all", + "split all basic blocks of each function into fragments such that each " + "fragment contains exactly a single basic block")), + cl::desc("strategy used to partition blocks into fragments"), + cl::cat(BoltOptCategory)); } // namespace opts namespace { -struct SplitCold { +struct SplitProfile2 { bool canSplit(const BinaryFunction &BF) { if (!BF.hasValidProfile()) return false; @@ -106,32 +119,25 @@ } template void partition(const It Start, const It End) const { - for (auto I = Start; I != End; ++I) { - BinaryBasicBlock *BB = *I; - if (!BB->canOutline()) - break; - BB->setIsCold(true); - } + std::for_each(Start, End, [](BinaryBasicBlock *const BB) { + assert(BB->canOutline() && + "Moving a block that is not outlineable to cold fragment"); + BB->setFragmentNum(FragmentNum::cold()); + }); } }; -struct SplitRandom { +struct SplitRandom2 { std::minstd_rand0 *Gen; - explicit SplitRandom(std::minstd_rand0 &Gen) : Gen(&Gen) {} + explicit SplitRandom2(std::minstd_rand0 &Gen) : Gen(&Gen) {} bool canSplit(const BinaryFunction &BF) { return true; } bool canOutline(const BinaryBasicBlock &BB) { return true; } template void partition(It Start, It End) const { - using DiffT = typename It::difference_type; - - const It OutlineableBegin = Start; - const It OutlineableEnd = - std::find_if(OutlineableBegin, End, [](const BinaryBasicBlock *BB) { - return !BB->canOutline(); - }); - const DiffT NumOutlineableBlocks = OutlineableEnd - OutlineableBegin; + using DiffT = typename std::iterator_traits::difference_type; + const DiffT NumOutlineableBlocks = End - Start; // We want to split at least one block unless there are not blocks that can // be outlined @@ -139,15 +145,27 @@ std::uniform_int_distribution Dist(MinimumSplit, NumOutlineableBlocks); const DiffT NumColdBlocks = Dist(*Gen); - const It ColdEnd = OutlineableBegin + NumColdBlocks; + std::for_each(End - NumColdBlocks, End, [](BinaryBasicBlock *BB) { + BB->setFragmentNum(FragmentNum::cold()); + }); LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " "{1} possible) blocks to split\n", - ColdEnd - OutlineableBegin, - OutlineableEnd - OutlineableBegin)); + NumColdBlocks, End - Start)); + } +}; + +struct SplitAll { + bool canSplit(const BinaryFunction &BF) { return true; } + bool canOutline(const BinaryBasicBlock &BB) { return true; } - std::for_each(OutlineableBegin, ColdEnd, - [](BinaryBasicBlock *BB) { BB->setIsCold(true); }); + template void partition(It Start, It End) const { + unsigned Fragment = 1; + std::for_each(Start, End, [&](BinaryBasicBlock *const BB) { + assert(BB->canOutline() && + "Moving a block that is not outlineable to cold fragment"); + BB->setFragmentNum(FragmentNum(Fragment++)); + }); } }; } // namespace @@ -167,23 +185,33 @@ if (!opts::SplitFunctions) return; - ParallelUtilities::WorkFuncTy WorkFun; std::minstd_rand0 RandGen(opts::RandomSeed.getValue()); - if (opts::RandomSplit) + + ParallelUtilities::WorkFuncTy WorkFun; + bool ForceSequential = false; + + switch (opts::SplitStrategy) { + case SplitFunctionsStrategy::Profile2: + WorkFun = [&](BinaryFunction &BF) { splitFunction(BF); }; + break; + case SplitFunctionsStrategy::Random2: WorkFun = [&](BinaryFunction &BF) { - splitFunction(BF, SplitRandom(RandGen)); + splitFunction(BF, SplitRandom2(RandGen)); }; - else - WorkFun = [&](BinaryFunction &BF) { splitFunction(BF); }; + // If we split functions randomly, we need to ensure that across runs with + // the same input, we generate random numbers for each function in the same + // order. + ForceSequential = true; + break; + case SplitFunctionsStrategy::All: + WorkFun = [&](BinaryFunction &BF) { splitFunction(BF); }; + break; + } ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { return !shouldOptimize(BF); }; - // If we split functions randomly, we need to ensure that across runs with the - // same input, we generate random numbers for each function in the same order. - const bool ForceSequential = opts::RandomSplit; - ParallelUtilities::runOnEachFunction( BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc, "SplitFunctions", ForceSequential); @@ -195,12 +223,12 @@ 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); } -template -void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) { +template +void SplitFunctions::splitFunction(BinaryFunction &BF, Strategy S) { if (BF.empty()) return; - if (!Strategy.canSplit(BF)) + if (!S.canSplit(BF)) return; FunctionLayout &Layout = BF.getLayout(); @@ -226,7 +254,7 @@ for (BinaryBasicBlock *const BB : NewLayout) { if (!BB->canOutline()) continue; - if (!Strategy.canOutline(*BB)) { + if (!S.canOutline(*BB)) { BB->setCanOutline(false); continue; } @@ -278,8 +306,16 @@ }); } - // Separate hot from cold starting from the bottom. - Strategy.partition(NewLayout.rbegin(), NewLayout.rend()); + // Identify the last block that must not be split into a fragment. Every block + // after this block can be split. Note that when the iterator points to the + // block that cannot be outlined, then reverse_iterator::base() points to the + // block after it. + const BinaryFunction::BasicBlockOrderType::reverse_iterator FirstOutlineable = + llvm::find_if(reverse(NewLayout), [](const BinaryBasicBlock *const BB) { + return !BB->canOutline(); + }); + + S.partition(FirstOutlineable.base(), NewLayout.end()); BF.getLayout().update(NewLayout); // For shared objects, invoke instructions and corresponding landing pads diff --git a/bolt/test/X86/split-all.s b/bolt/test/X86/split-all.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/split-all.s @@ -0,0 +1,84 @@ +# Test split all block strategy + +# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-strategy=all \ +# RUN: --print-split --print-only=chain \ +# RUN: 2>&1 | FileCheck %s + +# CHECK: Binary Function "chain" +# CHECK: IsSplit : +# CHECK-SAME: {{ 1$}} +# CHECK: {{^\.LBB00}} +# CHECK: ------- HOT-COLD SPLIT POINT ------- +# CHECK: {{^\.LFT0}} +# CHECK: ------- HOT-COLD SPLIT POINT ------- +# CHECK: {{^\.Ltmp0}} +# CHECK: ------- HOT-COLD SPLIT POINT ------- +# CHECK: {{^\.Ltmp2}} +# CHECK: ------- HOT-COLD SPLIT POINT ------- +# CHECK: {{^\.Ltmp3}} +# CHECK: ------- HOT-COLD SPLIT POINT ------- +# CHECK: {{^\.Ltmp4}} +# CHECK: ------- HOT-COLD SPLIT POINT ------- +# CHECK: {{^\.Ltmp5}} +# CHECK: ------- HOT-COLD SPLIT POINT ------- +# CHECK: {{^\.Ltmp1}} +# CHECK: End of Function "chain" + + .text + .globl chain + .type chain, @function +chain: +.Lchain_entry: + pushq %rbp + movq %rsp, %rbp + cmpl $2, %edi + jge .Lchain_start +.Lfast: + movl $5, %eax + jmp .Lexit +.Lchain_start: + movl $10, %eax + jmp .Lchain1 +.Lchain1: + addl $1, %eax + jmp .Lchain2 +.Lchain2: + addl $1, %eax + jmp .Lchain3 +.Lchain3: + addl $1, %eax + addl $1, %eax + addl $1, %eax + addl $1, %eax + addl $1, %eax + jmp .Lchain4 +.Lchain4: + addl $1, %eax + addl $1, %eax + addl $1, %eax + addl $1, %eax + addl $1, %eax + jmp .Lexit +.Lexit: + popq %rbp + ret +.Lchain_end: + .size chain, .Lchain_end-chain + + + .globl main + .type main, @function +main: + pushq %rbp + movq %rsp, %rbp + movl $1, %edi + call chain + movl $4, %edi + call chain + xorl %eax, %eax + popq %rbp + retq +.Lmain_end: + .size main, .Lmain_end-main diff --git a/bolt/test/X86/split-random.s b/bolt/test/X86/split-random.s --- a/bolt/test/X86/split-random.s +++ b/bolt/test/X86/split-random.s @@ -1,8 +1,8 @@ # Test random function splitting option # RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o -# RUN: %clang %cflags %t.o -o %t.exe -# RUN: llvm-bolt %t.exe -o %t.out --split-functions --split-random \ +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.out --split-functions --split-strategy=random2 \ # RUN: --print-finalized --print-only=two_block --bolt-seed=7 2>&1 | \ # RUN: FileCheck %s