diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -92,6 +93,11 @@ using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "sample-profile" +STATISTIC(NumCSInlined, + "Number of functions inlined with context sensitive profile"); +STATISTIC(NumCSNotInlined, + "Number of functions not inlined with context sensitive profile"); + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt SampleProfileFile( @@ -870,14 +876,14 @@ getInlineCost(cast(*I), Params, GetTTI(*CalledFunction), GetAC, None, nullptr, nullptr); if (Cost.isNever()) { - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB) + ORE->emit(OptimizationRemark("sample-profile-inline", "NotInline", DLoc, BB) << "incompatible inlining"); return false; } InlineFunctionInfo IFI(nullptr, &GetAC); if (InlineFunction(CS, IFI)) { // The call to InlineFunction erases I, so we can't pass it here. - ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB) + ORE->emit(OptimizationRemark("sample-profile-inline", "HotInline", DLoc, BB) << "inlined hot callee '" << ore::NV("Callee", CalledFunction) << "' into '" << ore::NV("Caller", BB->getParent()) << "'"); return true; @@ -932,6 +938,18 @@ if (Hot) { CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end()); } + else { + for (auto I : Candidates) { + Function *CalledFunction = CallSite(I).getCalledFunction(); + if (CalledFunction) { + ORE->emit(OptimizationRemark("sample-profile-inline", "NotInline", + I->getDebugLoc(), I->getParent()) + << "previous inline not repeated '" + << ore::NV("Callee", CalledFunction) << "' into '" + << ore::NV("Caller", &F) << "'"); + } + } + } } for (auto I : CIS) { Function *CalledFunction = CallSite(I).getCalledFunction(); @@ -976,6 +994,7 @@ inlineCallInstruction(DI)) { localNotInlinedCallSites.erase(I); LocalChanged = true; + ++NumCSInlined; } } else { LLVM_DEBUG(dbgs() @@ -988,6 +1007,7 @@ if (inlineCallInstruction(I)) { localNotInlinedCallSites.erase(I); LocalChanged = true; + ++NumCSInlined; } } else if (IsThinLTOPreLink) { findCalleeFunctionSamples(*I)->findInlinedFunctions( @@ -1011,6 +1031,7 @@ auto pair = notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); pair.first->second.entryCount += FS->getEntrySamples(); + ++NumCSNotInlined; } return Changed; } diff --git a/llvm/test/Transforms/SampleProfile/inline-stats.ll b/llvm/test/Transforms/SampleProfile/inline-stats.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-stats.ll @@ -0,0 +1,110 @@ +; REQUIRES: asserts +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -stats -S 2>&1 | FileCheck %s + +; Original C++ test case +; +; #include +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return 0; +; } +; +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4, !dbg !11 + %1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; Function Attrs: uwtable +define i32 @main() !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 + + +if.then: ; preds = %while.body + %2 = load i32, i32* %i, align 4, !dbg !18 + %3 = load i32, i32* %s, align 4, !dbg !18 + %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18 + store i32 %call, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %s, align 4, !dbg !24 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24 + ret i32 0, !dbg !25 +} + +declare i32 @printf(i8*, ...) #2 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) + +; CHECK: 1 sample-profile - Number of functions inlined with context sensitive profile \ No newline at end of file