diff --git a/bolt/README.md b/bolt/README.md --- a/bolt/README.md +++ b/bolt/README.md @@ -180,7 +180,7 @@ BOLT. Assuming your environment is setup to include the right path, execute `llvm-bolt`: ``` -$ llvm-bolt -o .bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=hfsort -split-functions=2 -split-all-cold -split-eh -dyno-stats +$ llvm-bolt -o .bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=hfsort -split-functions -split-all-cold -split-eh -dyno-stats ``` If you do need an updated debug info, then add `-update-debug-sections` option diff --git a/bolt/docs/OptimizingClang.md b/bolt/docs/OptimizingClang.md --- a/bolt/docs/OptimizingClang.md +++ b/bolt/docs/OptimizingClang.md @@ -64,7 +64,7 @@ the generated profile: ```bash $ llvm-bolt $CPATH/clang-7 -o $CPATH/clang-7.bolt -b clang-7.yaml \ - -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 \ + -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions \ -split-all-cold -dyno-stats -icf=1 -use-gnu-stack ``` The output will look similar to the one below: diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h --- a/bolt/include/bolt/Passes/SplitFunctions.h +++ b/bolt/include/bolt/Passes/SplitFunctions.h @@ -18,15 +18,6 @@ /// Split function code in multiple parts. class SplitFunctions : public BinaryFunctionPass { -public: - /// Settings for splitting function bodies into hot/cold partitions. - enum SplittingType : char { - ST_NONE = 0, /// Do not split functions. - ST_LARGE, /// In non-relocation mode, only split functions that - /// are too large to fit into the original space. - ST_ALL, /// Split all functions. - }; - private: /// Split function body into fragments. void splitFunction(BinaryFunction &Function); diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -14,6 +14,7 @@ #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/ParallelUtilities.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include @@ -22,6 +23,25 @@ using namespace llvm; using namespace bolt; +namespace { +class DeprecatedSplitFunctionOptionParser : public cl::parser { +public: + explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) + : cl::parser(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { + if (Arg == "2" || Arg == "3") { + Value = true; + errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " + "for option -{1} is deprecated\n", + Arg, ArgName); + return false; + } + return cl::parser::parse(O, ArgName, Arg, Value); + } +}; +} // namespace + namespace opts { extern cl::OptionCategory BoltOptCategory; @@ -42,21 +62,10 @@ cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt -SplitFunctions("split-functions", - cl::desc("split functions into hot and cold regions"), - cl::init(SplitFunctions::ST_NONE), - cl::values(clEnumValN(SplitFunctions::ST_NONE, "0", - "do not split any function"), - clEnumValN(SplitFunctions::ST_LARGE, "1", - "in non-relocation mode only split functions too large " - "to fit into original code space"), - clEnumValN(SplitFunctions::ST_LARGE, "2", - "same as 1 (backwards compatibility)"), - clEnumValN(SplitFunctions::ST_ALL, "3", - "split all functions")), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +static cl::opt + SplitFunctions("split-functions", + cl::desc("split functions into hot and cold regions"), + cl::cat(BoltOptCategory)); static cl::opt SplitThreshold( "split-threshold", @@ -66,11 +75,6 @@ "increase after splitting."), cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); -void syncOptions(BinaryContext &BC) { - if (!BC.HasRelocations && opts::SplitFunctions == SplitFunctions::ST_LARGE) - opts::SplitFunctions = SplitFunctions::ST_ALL; -} - } // namespace opts namespace llvm { @@ -85,9 +89,7 @@ } void SplitFunctions::runOnFunctions(BinaryContext &BC) { - opts::syncOptions(BC); - - if (opts::SplitFunctions == SplitFunctions::ST_NONE) + if (!opts::SplitFunctions) return; ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { @@ -140,12 +142,6 @@ << " pre-split is <0x" << Twine::utohexstr(OriginalHotSize) << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); - if (opts::SplitFunctions == SplitFunctions::ST_LARGE && - !BC.HasRelocations) { - // Split only if the function wouldn't fit. - if (OriginalHotSize <= BF.getMaxSize()) - return; - } } // Never outline the first basic block. diff --git a/bolt/test/X86/bug-reorder-bb-jrcxz.s b/bolt/test/X86/bug-reorder-bb-jrcxz.s --- a/bolt/test/X86/bug-reorder-bb-jrcxz.s +++ b/bolt/test/X86/bug-reorder-bb-jrcxz.s @@ -16,7 +16,7 @@ # RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \ # RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort \ -# RUN: --split-functions=2 --split-all-cold --split-eh --dyno-stats \ +# RUN: --split-functions --split-all-cold --split-eh --dyno-stats \ # RUN: --print-finalized 2>&1 | FileCheck %s # CHECK-NOT: value of -2105 is too large for field of 1 byte. diff --git a/bolt/test/X86/jump-table-icp.test b/bolt/test/X86/jump-table-icp.test --- a/bolt/test/X86/jump-table-icp.test +++ b/bolt/test/X86/jump-table-icp.test @@ -5,7 +5,7 @@ RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q RUN: (llvm-bolt %t.exe --data %t.fdata -o %t --relocs \ -RUN: --reorder-blocks=cache --split-functions=3 --split-all-cold \ +RUN: --reorder-blocks=cache --split-functions --split-all-cold \ RUN: --use-gnu-stack --dyno-stats --indirect-call-promotion=jump-tables \ RUN: --print-icp -v=0 \ RUN: --icp-jt-remaining-percent-threshold=10 \ diff --git a/bolt/test/X86/shared_object_entry.s b/bolt/test/X86/shared_object_entry.s --- a/bolt/test/X86/shared_object_entry.s +++ b/bolt/test/X86/shared_object_entry.s @@ -1,7 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o # RUN: ld.lld %t.o -o %t.so --shared --entry=func1.cold.1 --emit-relocs # RUN: llvm-bolt -relocs %t.so -o %t -reorder-functions=hfsort+ \ -# RUN: -split-functions=3 -reorder-blocks=ext-tsp -split-all-cold \ +# RUN: -split-functions -reorder-blocks=ext-tsp -split-all-cold \ # RUN: -dyno-stats -icf=1 -use-gnu-stack # Check that an entry point is a cold symbol diff --git a/bolt/test/X86/unreachable.test b/bolt/test/X86/unreachable.test --- a/bolt/test/X86/unreachable.test +++ b/bolt/test/X86/unreachable.test @@ -5,7 +5,7 @@ RUN: %p/Inputs/unreachable.s -o %t.o RUN: %clangxx %cxxflags -no-pie %t.o -o %t.exe %t.so RUN: llvm-bolt %t.exe -o %t \ -RUN: -reorder-blocks=none -split-functions=1 -eliminate-unreachable \ +RUN: -reorder-blocks=none -split-functions -eliminate-unreachable \ RUN: -funcs=foo -use-gnu-stack -print-cfg -print-finalized \ RUN: | FileCheck %s --check-prefix=BOLT RUN: llvm-objdump -d %t --print-imm-hex --disassemble-symbols=foo \ diff --git a/bolt/test/runtime/X86/exceptions-instrumentation.test b/bolt/test/runtime/X86/exceptions-instrumentation.test --- a/bolt/test/runtime/X86/exceptions-instrumentation.test +++ b/bolt/test/runtime/X86/exceptions-instrumentation.test @@ -9,7 +9,7 @@ RUN: llvm-bolt %t_exc_split -o %t.exc.bolted --data %t.fdata \ RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort+ \ -RUN: --split-functions=3 --split-eh=1 \ +RUN: --split-functions --split-eh=1 \ RUN: | FileCheck --check-prefix=EXCEPTIONS %s EXCEPTIONS-NOT: invalid (possibly stale) profile diff --git a/bolt/test/runtime/X86/pie-exceptions-split.test b/bolt/test/runtime/X86/pie-exceptions-split.test --- a/bolt/test/runtime/X86/pie-exceptions-split.test +++ b/bolt/test/runtime/X86/pie-exceptions-split.test @@ -10,7 +10,7 @@ RUN: %t.instr RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \ -RUN: --split-functions=1 --split-eh --print-after-lowering \ +RUN: --split-functions --split-eh --print-after-lowering \ RUN: --print-only=main 2>&1 | FileCheck %s ## All calls to printf() should be from exception handling code that was @@ -26,4 +26,3 @@ CHECK-BOLTED: catch 2 CHECK-BOLTED-NEXT: catch 1 - diff --git a/bolt/test/runtime/meta-merge-fdata.test b/bolt/test/runtime/meta-merge-fdata.test --- a/bolt/test/runtime/meta-merge-fdata.test +++ b/bolt/test/runtime/meta-merge-fdata.test @@ -22,7 +22,7 @@ # Check that BOLT works with this profile RUN: llvm-bolt merge-fdata -o %t.bolt --data %t.fdata1 \ RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort+ \ -RUN: --split-functions=3 \ +RUN: --split-functions \ RUN: | FileCheck %s --check-prefix=CHECK-BOLT1 CHECK-BOLT1-NOT: invalid (possibly stale) profile @@ -44,7 +44,7 @@ # Optimize using merged fdata RUN: llvm-bolt merge-fdata -o %t.opt --data %t.fdata.base \ RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort+ \ -RUN: --split-functions=3 \ +RUN: --split-functions \ RUN: | FileCheck %s --check-prefix=CHECK-BOLT2 CHECK-BOLT2-NOT: invalid (possibly stale) profile