diff --git a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h --- a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h +++ b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h @@ -12,7 +12,9 @@ #ifndef LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H #define LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H +#include "llvm/ADT/StringSet.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/SpecialCaseList.h" namespace llvm { @@ -37,11 +39,13 @@ function_ref GTTI, std::function *GORE, function_ref LAC) - : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {} + : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC), + FileMarkedColdFunctions(nullptr) {} bool run(Module &M); private: bool isFunctionCold(const Function &F) const; + bool isFunctionInColdList(const Function &F) const; bool shouldOutlineFrom(const Function &F) const; bool outlineColdRegions(Function &F, bool HasProfileSummary); Function *extractColdRegion(const BlockSequence &Region, @@ -55,6 +59,8 @@ function_ref GetTTI; std::function *GetORE; function_ref LookupAC; + StringSet<> CmdMarkedColdFunctions; + std::unique_ptr FileMarkedColdFunctions; }; /// Pass to outline cold regions. @@ -66,4 +72,3 @@ } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H - diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -59,6 +60,8 @@ #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/SpecialCaseList.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" @@ -69,6 +72,9 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include #include +#include +#include +#include #define DEBUG_TYPE "hotcoldsplit" @@ -85,6 +91,17 @@ cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)")); +static cl::opt + ColdFunctionsList("cold-functions-list", cl::init(""), cl::Hidden, + cl::desc("Comma-separated list of functions to mark" + " as cold during hot/cold splitting.")); + +static cl::opt + ColdFunctionsFile("cold-functions-file", cl::init(""), cl::Hidden, + cl::desc("File name containing a newline-separated list" + " of function names to mark as cold during" + " hot/cold splitting.")); + namespace { // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify // this function unless you modify the MBB version as well. @@ -191,7 +208,29 @@ } // end anonymous namespace -/// Check whether \p F is inherently cold. +// Check whether \p F is in list of user-supplied cold functions. +bool HotColdSplitting::isFunctionInColdList(const Function &F) const { + // If user supplies any extra information + // on cold functions via command-line or file input, + // use them to determine if function is cold or not. + if (CmdMarkedColdFunctions.find(F.getName()) != + CmdMarkedColdFunctions.end()) { + LLVM_DEBUG(dbgs() << "isFunctionCold: " << F.getName() << " is cold " + << " via command line info.\n"); + return true; + } + + if (FileMarkedColdFunctions && + FileMarkedColdFunctions->inSection("", "", F.getName())) { + LLVM_DEBUG(dbgs() << "isFunctionCold: " << F.getName() << " is cold " + << " via file info.\n"); + return true; + } + + return false; +} + +// Check whether \p F is inherently cold. bool HotColdSplitting::isFunctionCold(const Function &F) const { if (F.hasFnAttribute(Attribute::Cold)) return true; @@ -656,9 +695,44 @@ bool HotColdSplitting::run(Module &M) { bool Changed = false; bool HasProfileSummary = (M.getProfileSummary(/* IsCS */ false) != nullptr); + + // Read in user-defined cold function names, if any. + if (ColdFunctionsList != "") { + LLVM_DEBUG(dbgs() << "Reading in cold functions from command line.\n"); + std::stringstream CFStream(ColdFunctionsList); + while (CFStream.good()) { + std::string CFName; + std::getline(CFStream, CFName, ','); + LLVM_DEBUG(dbgs() << " Function " << CFName + << " listed as cold from command line.\n"); + CmdMarkedColdFunctions.insert(CFName); + } + } + + // Read in user-defined cold function names supplied + // by a file. + if (ColdFunctionsFile != "") { + // Use the SpecialCaseList helper to read in the + // cold functions file. + LLVM_DEBUG(dbgs() << "Reading in functions from file " + << ColdFunctionsFile); + std::unique_ptr FS = vfs::createPhysicalFileSystem(); + FileMarkedColdFunctions = + SpecialCaseList::createOrDie({ColdFunctionsFile}, *FS); + } + for (auto It = M.begin(), End = M.end(); It != End; ++It) { Function &F = *It; + // Mark functions in user-supplied list of cold-functions + // (if user decides to supply them) as cold. + // The reason this has to be done separately from + // isFunctionCold() is potentially declarations might be + // marked, too. + if (isFunctionInColdList(F)) { + Changed |= markFunctionCold(F); + } + // Do not touch declarations. if (F.isDeclaration()) continue; @@ -672,6 +746,18 @@ Changed |= markFunctionCold(F); continue; } + } + + for (auto It = M.begin(), End = M.end(); It != End; ++It) { + Function &F = *It; + + // Do not touch declarations. + if (F.isDeclaration()) + continue; + + // Do not modify `optnone` functions. + if (F.hasOptNone()) + continue; if (!shouldOutlineFrom(F)) { LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n"); diff --git a/llvm/test/Transforms/HotColdSplit/custom-cold-cmd.ll b/llvm/test/Transforms/HotColdSplit/custom-cold-cmd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/custom-cold-cmd.ll @@ -0,0 +1,122 @@ +; RUN: opt -S -hotcoldsplit -cold-functions-list=__cxa_guard_acquire,__cxa_guard_release,__cxa_guard_abort < %s 2>&1 | FileCheck %s +; RUN: echo -e ":__cxa_guard_acquire\n:__cxa_guard_release\n:__cxa_guard_abort\n" > ./coldfuncs && opt -S -hotcoldsplit -cold-functions-file=./coldfuncs < %s 2>&1 | FileCheck %s +%struct.foo = type { i8 } + +@_ZZ2govE1f = internal global %struct.foo zeroinitializer, align 1 +@_ZGVZ2govE1f = internal global i64 0, align 8 +@__dso_handle = external hidden global i8 +@_ZZ8go_leakyvE1f = internal global %struct.foo* null, align 8 +@_ZGVZ8go_leakyvE1f = internal global i64 0, align 8 + +; Since __cxa_guard_acquire/release/abort functions are marked +; as cold via command-line input, they should all share the same attribute. +; CHECK: declare {{.*}}@__cxa_guard_acquire{{.*}} [[cold_attr:#[0-9]+]] +; CHECK: declare {{.*}}@__cxa_guard_release{{.*}} [[cold_attr]] +; CHECK: declare {{.*}}@__cxa_guard_abort{{.*}} [[cold_attr]] +; CHECK: define internal void @_Z2gov.cold.1 +; CHECK: define internal void @_Z8go_leakyv.cold.1(i8* %call) +; CHECK: attributes [[cold_attr]] = { {{.*}}cold{{.*}} } + +define dso_local void @_Z2gov() #0 { +entry: + %0 = load atomic i8, i8* bitcast (i64* @_ZGVZ2govE1f to i8*) acquire, align 8 + %guard.uninitialized = icmp eq i8 %0, 0 + br i1 %guard.uninitialized, label %init.check, label %init.end, !prof !1 + +init.check: ; preds = %entry + %1 = call i32 @__cxa_guard_acquire(i64* @_ZGVZ2govE1f) #1 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %init, label %init.end + +init: ; preds = %init.check + call void @_ZN3fooC1Ev(%struct.foo* @_ZZ2govE1f) #1 + %2 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.foo*)* @_ZN3fooD1Ev to void (i8*)*), i8* getelementptr inbounds (%struct.foo, %struct.foo* @_ZZ2govE1f, i32 0, i32 0), i8* @__dso_handle) #1 + call void @__cxa_guard_release(i64* @_ZGVZ2govE1f) #1 + br label %init.end + +init.end: ; preds = %init, %init.check, %entry + ret void +} + +declare dso_local i32 @__cxa_guard_acquire(i64*) #1 + +declare extern_weak dso_local void @_ZN3fooC1Ev(%struct.foo*) unnamed_addr #2 + +declare extern_weak dso_local void @_ZN3fooD1Ev(%struct.foo*) unnamed_addr #2 + +declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #1 + +declare dso_local void @__cxa_guard_release(i64*) #1 + +define dso_local void @_Z8go_leakyv() #3 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %exn.slot = alloca i8*, align 8 + %ehselector.slot = alloca i32, align 4 + %0 = load atomic i8, i8* bitcast (i64* @_ZGVZ8go_leakyvE1f to i8*) acquire, align 8 + %guard.uninitialized = icmp eq i8 %0, 0 + br i1 %guard.uninitialized, label %init.check, label %init.end, !prof !1 + +init.check: ; preds = %entry + %1 = call i32 @__cxa_guard_acquire(i64* @_ZGVZ8go_leakyvE1f) #1 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %init, label %init.end + +init: ; preds = %init.check + %call = invoke noalias nonnull i8* @_Znwm(i64 1) #6 + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %init + %2 = bitcast i8* %call to %struct.foo* + call void @_ZN3fooC1Ev(%struct.foo* %2) #1 + store %struct.foo* %2, %struct.foo** @_ZZ8go_leakyvE1f, align 8 + call void @__cxa_guard_release(i64* @_ZGVZ8go_leakyvE1f) #1 + br label %init.end + +init.end: ; preds = %invoke.cont, %init.check, %entry + ret void + +lpad: ; preds = %init + %3 = landingpad { i8*, i32 } + cleanup + %4 = extractvalue { i8*, i32 } %3, 0 + store i8* %4, i8** %exn.slot, align 8 + %5 = extractvalue { i8*, i32 } %3, 1 + store i32 %5, i32* %ehselector.slot, align 4 + call void @__cxa_guard_abort(i64* @_ZGVZ8go_leakyvE1f) #1 + br label %eh.resume + +eh.resume: ; preds = %lpad + %exn = load i8*, i8** %exn.slot, align 8 + %sel = load i32, i32* %ehselector.slot, align 4 + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 + %lpad.val1 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 + resume { i8*, i32 } %lpad.val1 +} + +; Function Attrs: nobuiltin allocsize(0) +declare dso_local nonnull i8* @_Znwm(i64) #4 + +declare dso_local i32 @__gxx_personality_v0(...) + +; Function Attrs: nounwind +declare dso_local void @__cxa_guard_abort(i64*) #1 + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() #5 { +entry: + ret i32 0 +} + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { nounwind } +attributes #3 = { uwtable } +attributes #4 = { nobuiltin allocsize(0) } +attributes #5 = { norecurse nounwind uwtable } +attributes #6 = { builtin allocsize(0) } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"branch_weights", i32 1, i32 1048575} +