Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -125,6 +125,15 @@ "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); +static cl::opt + DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, + cl::desc("Disable pre-instrumentation inliner")); + +static cl::opt PreInlineThreshold( + "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + cl::desc("Control the amount of inlining in pre-instrumentation inliner " + "(default = 75)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -228,6 +237,19 @@ // Do PGO instrumentation generation or use pass as the option specified. void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { + if (PGOInstrGen.empty() && PGOInstrUse.empty()) + return; + // Perform the preinline and cleanup passes for O1 and above. + // And avoid doing them if optimizing for size. + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) { + // Create preinline pass. + MPM.add(createFunctionInliningPass(PreInlineThreshold)); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); + } if (!PGOInstrGen.empty()) { MPM.add(createPGOInstrumentationGenLegacyPass()); // Add the profile lowering pass. Index: test/Transforms/PGOProfile/preinline.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/preinline.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -O2 -profile-generate=default.profraw -S | FileCheck %s --check-prefix=GEN +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32 %i) { +entry: +; GEN: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo +; GEN-NOT: %pgocount.i = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__bar + %call = call i32 @bar() + %add = add nsw i32 %i, %call + ret i32 %add +} + +define internal i32 @bar() { +entry: + %call = call i32 (...) @bar1() + ret i32 %call +} + +declare i32 @bar1(...)