Index: llvm/include/llvm/IR/Function.h =================================================================== --- llvm/include/llvm/IR/Function.h +++ llvm/include/llvm/IR/Function.h @@ -662,6 +662,10 @@ /// create a Function) from the Function Src to this one. void copyAttributesFrom(const Function *Src); + /// markOptnoneIfPossible - mark this function with `optnone` without + /// conflicting with other attributes (e.g. alwaysinline) + void markOptnoneIfPossible(); + /// deleteBody - This method deletes the body of the function, and converts /// the linkage to external. /// Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -605,6 +605,17 @@ setPrologueData(Src->getPrologueData()); } +/// Mark this function with `optnone` without conflicting +/// with other attributes (e.g. alwaysinline) +void Function::markOptnoneIfPossible() { + if (hasFnAttribute(Attribute::MinSize) || + hasFnAttribute(Attribute::AlwaysInline)) + return; + // OptimizeNone requires NoInline. + addFnAttr(Attribute::OptimizeNone); + addFnAttr(Attribute::NoInline); +} + /// Table of string intrinsic names indexed by enum value. static const char * const IntrinsicNameTable[] = { "not_intrinsic", Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -265,6 +265,16 @@ PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, cl::desc("Use the old CFG function hashing")); +// Command-line option to mark cold functions as Optnone. +cl::opt OmitColdFuncOpt("profile-omit-cold-func-opt", cl::init(false), + cl::Hidden, + cl::desc("Add 'optnone' to cold functions " + "if possible")); +cl::opt OmitColdFuncOptPercentage( + "profile-omit-cold-func-opt-percent", cl::init(0U), cl::Hidden, + cl::desc("Function at the # lower percentage would be " + "considered cold")); + // Return a string describing the branch condition that can be // used in static branch probability heuristics: static std::string getBranchCondString(Instruction *TI) { @@ -1663,11 +1673,26 @@ std::vector HotFunctions; std::vector ColdFunctions; + if (OmitColdFuncOpt) { + // OmitColdFuncOptPercentage validation + if (OmitColdFuncOptPercentage > 100U) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + ProfileFileName.data(), + "Invalid range of `-profile-deopt-cold-percent` " + "(should be within [0,100])")); + return false; + } + } + // If the profile marked as always instrument the entry BB, do the // same. Note this can be overwritten by the internal option in CFGMST.h bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; + + using FuncCountEntry = std::pair; + SmallVector FuncCounts; + for (auto &F : M) { if (F.isDeclaration()) continue; @@ -1687,12 +1712,18 @@ bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) continue; + if (AllZeros) { F.setEntryCount(ProfileCount(0, Function::PCT_Real)); if (Func.getProgramMaxCount() != 0) ColdFunctions.push_back(&F); continue; } + + CountSumOrPercent FuncAccumCount; + Func.getProfileRecord().accumulateCounts(FuncAccumCount); + FuncCounts.emplace_back(&F, std::move(FuncAccumCount)); + const unsigned MultiplyFactor = 3; if (AllMinusOnes) { uint64_t HotThreshold = PSI->getHotCountThreshold(); @@ -1741,6 +1772,23 @@ } } + if (OmitColdFuncOpt) { + llvm::sort(FuncCounts.begin(), FuncCounts.end(), + [](const FuncCountEntry &LHS, const FuncCountEntry &RHS) { + const auto &LHSCount = LHS.second; + const auto &RHSCount = RHS.second; + uint64_t LHSAvg = LHSCount.CountSum / LHSCount.NumEntries; + uint64_t RHSAvg = RHSCount.CountSum / RHSCount.NumEntries; + return LHSAvg < RHSAvg; + }); + + auto NumFuncs = FuncCounts.size(); + int NumCold = NumFuncs * OmitColdFuncOptPercentage / 100U; + for (int I = 0; I < NumCold; ++I) { + ColdFunctions.push_back(FuncCounts[I].first); + } + } + // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence // can affect the BranchProbabilityInfo of any callers, resulting in an @@ -1754,6 +1802,12 @@ F->addFnAttr(Attribute::Cold); LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); + // Optionally mark optnone as well. + if (OmitColdFuncOpt) { + F->markOptnoneIfPossible(); + LLVM_DEBUG(dbgs() << "Set optnone attribute to function: " << F->getName() + << "\n"); + } } return true; } Index: llvm/test/Transforms/PGOProfile/Inputs/omit-cold-func-opt.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/omit-cold-func-opt.proftext @@ -0,0 +1,108 @@ +# IR level Instrumentation Flag +:ir +func_0 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +0 + +func_10 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +3 + +func_100 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +12 + +func_20 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +4 + +func_30 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +5 + +func_40 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +6 + +func_50 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +7 + +func_60 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +8 + +func_70 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +9 + +func_80 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +10 + +func_90 +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +11 + +main +# Func Hash: +950994589843891609 +# Num Counters: +11 +# Counter Values: +4 +5 +6 +7 +8 +9 +10 +11 +12 +3 +1 + Index: llvm/test/Transforms/PGOProfile/omit-cold-func-opt.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/omit-cold-func-opt.ll @@ -0,0 +1,121 @@ +; ModuleID = './test_disopt_cold.c' + +; RUN: llvm-profdata merge %S/Inputs/omit-cold-func-opt.proftext -o %t.profdata + +source_filename = "./test_disopt_cold.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -profile-omit-cold-func-opt -S -o - | FileCheck --check-prefix=CHECK-0-PERCENT %s + +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -profile-omit-cold-func-opt -profile-omit-cold-func-opt-percent=25 -S -o - | FileCheck --check-prefix=CHECK-25-PERCENT %s + +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -profile-omit-cold-func-opt -profile-omit-cold-func-opt-percent=50 -S -o - | FileCheck --check-prefix=CHECK-50-PERCENT %s + +; CHECK-0-PERCENT: optnone +; CHECK-0-PERCENT-NEXT: define {{.*}} i32 @func_0 + +; CHECK-25-PERCENT: optnone +; CHECK-25-PERCENT-NEXT: define {{.*}} i32 @func_0 +; CHECK-25-PERCENT: optnone +; CHECK-25-PERCENT-NEXT: define {{.*}} i32 @func_10 +; CHECK-25-PERCENT: optnone +; CHECK-25-PERCENT-NEXT: define {{.*}} i32 @func_20 + +; CHECK-50-PERCENT: optnone +; CHECK-50-PERCENT-NEXT: define {{.*}} i32 @func_0 +; CHECK-50-PERCENT: optnone +; CHECK-50-PERCENT-NEXT: define {{.*}} i32 @func_10 +; CHECK-50-PERCENT: optnone +; CHECK-50-PERCENT-NEXT: define {{.*}} i32 @func_20 +; CHECK-50-PERCENT: optnone +; CHECK-50-PERCENT-NEXT: define {{.*}} i32 @func_30 +; CHECK-50-PERCENT: optnone +; CHECK-50-PERCENT-NEXT: define {{.*}} i32 @func_40 +; CHECK-50-PERCENT: optnone +; CHECK-50-PERCENT-NEXT: define {{.*}} i32 @func_50 + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_0(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 94 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_10(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 894794 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_20(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 894794 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_30(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 994874 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_40(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 879944 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_50(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 994487 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_60(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 984794 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_70(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 949487 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_80(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 948794 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_90(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 9487 + ret i32 %add +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @func_100(i32 %x) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %x, 87 + ret i32 %add +} + +attributes #0 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"https://github.com/llvm/llvm-project"}