Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -606,6 +606,9 @@ /// /// \returns the weight for \p BB. ErrorOr SampleProfileLoader::getBlockWeight(const BasicBlock *BB) { + auto iter = BlockWeights.find(BB); + if (iter != BlockWeights.end()) + return iter->second; uint64_t Max = 0; bool HasWeight = false; for (auto &I : BB->getInstList()) { @@ -844,6 +847,8 @@ Instruction *DI = pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE); Sum -= C; + BlockWeights[DI->getParent()] = C; + BlockWeights[I->getParent()] = Sum; PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && Index: test/Transforms/SampleProfile/Inputs/indirect-call2.prof =================================================================== --- /dev/null +++ test/Transforms/SampleProfile/Inputs/indirect-call2.prof @@ -0,0 +1,31 @@ +test:63067:0 + 1: 3345 _Z3barv:1398 _Z3foov:2059 +test_inline:3000:8000 + 1: 8000 foo_inline3:1000 + 1: foo_inline1:3000 + 11: 3000 + 1: foo_inline2:4000 + 19: 4000 +test_noinline:3000:0 + 1: foo_noinline:3000 + 20: 3000 +test_direct:3000:0 + 1: foo_direct:3000 + 21: 3000 +test_inline_strip:3000:0 + 1: foo_inline_strip:3000 + 1: 3000 +test_inline_strip_conflict:3000:0 + 1: foo_inline_strip_conflict:3000 + 1: 3000 +test_norecursive_inline:3000:0 + 1: test_norecursive_inline:3000 + 20: 3000 +test_noinline_bitcast:3000:0 + 1: foo_direct_i32:3000 + 1: 3000 +return_arg_caller:3000:0 + 1: foo_inline1:3000 + 11: 3000 + 2: return_arg:3000 + 1: 3000 Index: test/Transforms/SampleProfile/indirect-call2.ll =================================================================== --- /dev/null +++ test/Transforms/SampleProfile/indirect-call2.ll @@ -0,0 +1,102 @@ +; facebook T38406375 +; this variant of indirect-call.ll fixes the profile data so that it +; is sensible (numbers are consistent with flow graph) +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call2.prof -S | FileCheck %s + +@x = global i32 0, align 4 +@y = global void ()* null, align 8 + +define void @test(void ()*) !dbg !3 { +; CHECK: define void @test(void ()*){{.*}} !prof ![[M0:[0-9]+]] { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 + call void %3(), !dbg !4 +; CHECK: call void %3(),{{.*}}, !prof ![[M1:[0-9]+]] + ret void +} + +; If the indirect call is promoted and inlined in profile, we should promote and inline it. +define void @test_inline(i64* (i32*)*, i32* %x) !dbg !6 { +; CHECK: define void @test_inline(i64* (i32*)*, i32* %x){{.*}} !prof ![[M2:[0-9]+]] { + %2 = alloca i64* (i32*)* + store i64* (i32*)* %0, i64* (i32*)** %2 + %3 = load i64* (i32*)*, i64* (i32*)** %2 +; CHECK: icmp eq i64* (i32*)* %3, bitcast (i32* (i32*)* @foo_inline2 to i64* (i32*)*),{{.*}} +; CHECK: br i1 %4, label %if.true.direct_targ, label %if.false.orig_indirect,{{.*}}, !prof ![[M3:[0-9]+]] +; CHECK: if.true.direct_targ: +; CHECK: bitcast i32* %x to i64* +; CHECK: br label %if.end.icp,{{.*}} +; CHECK: if.false.orig_indirect: +; CHECK: icmp eq i64* (i32*)* %3, bitcast (i32* (i32*)* @foo_inline1 to i64* (i32*)*),{{.*}} +; CHECK: br i1 %6, label %if.true.direct_targ1, label %if.false.orig_indirect2,{{.*}}, !prof ![[M4:[0-9]+]] +; CHECK: if.true.direct_targ1: +; CHECK: bitcast i32* %x to i64* +; CHECK: br label %if.end.icp3,{{.*}} +; CHECK: if.false.orig_indirect2: + call i64* %3(i32* %x), !dbg !7 +; CHECK: call i64* %3(i32* %x),{{.*}}, !prof ![[M5:[0-9]+]] +; CHECK: br label %if.end.icp3,{{.*}} +; CHECK: if.end.icp3: +; CHECK: br label %if.end.icp,{{.*}} +; CHECK: if.end.icp: + ret void +} + +; If the indirect call is promoted and inlined in profile, and the callee name +; is stripped we should promote and inline it. + +define i32* @foo_inline1(i32* %x) !dbg !14 { +; CHECK: define i32* @foo_inline1(i32* %x){{.*}} !prof ![[M6:[0-9]+]] { + ret i32* %x +} + +define i32* @foo_inline2(i32* %x) !dbg !19 { +; CHECK: define i32* @foo_inline2(i32* %x){{.*}} !prof ![[M6]] { + ret i32* %x +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} +; CHECK: !llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "test.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, unit: !0) +; CHECK: ![[M0]] = !{!"function_entry_count", i64 1} +!4 = !DILocation(line: 4, scope: !3) +!5 = !DILocation(line: 6, scope: !3) +!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0) +!7 = !DILocation(line: 7, scope: !6) +!8 = distinct !DISubprogram(name: "test_inline_strip", scope: !1, file: !1, line: 8, unit: !0) +!9 = !DILocation(line: 9, scope: !8) +!10 = distinct !DISubprogram(name: "test_inline_strip_conflict", scope: !1, file: !1, line: 10, unit: !0) +!11 = !DILocation(line: 11, scope: !10) +!12 = distinct !DISubprogram(name: "test_noinline", scope: !1, file: !1, line: 12, unit: !0) +!13 = !DILocation(line: 13, scope: !12) +!14 = distinct !DISubprogram(name: "foo_inline1", scope: !1, file: !1, line: 11, unit: !0) +!15 = distinct !DISubprogram(name: "foo_inline_strip.suffix", scope: !1, file: !1, line: 1, unit: !0) +!16 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix1", scope: !1, file: !1, line: 1, unit: !0) +!17 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix2", scope: !1, file: !1, line: 1, unit: !0) +!18 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix3", scope: !1, file: !1, line: 1, unit: !0) +!19 = distinct !DISubprogram(name: "foo_inline2", scope: !1, file: !1, line: 19, unit: !0) +!20 = distinct !DISubprogram(name: "foo_noinline", scope: !1, file: !1, line: 20, unit: !0) +!21 = distinct !DISubprogram(name: "foo_direct", scope: !1, file: !1, line: 21, unit: !0) +!22 = distinct !DISubprogram(name: "test_direct", scope: !1, file: !1, line: 22, unit: !0) +!23 = !DILocation(line: 23, scope: !22) +!24 = distinct !DISubprogram(name: "test_norecursive_inline", scope: !1, file: !1, line: 12, unit: !0) +!25 = !DILocation(line: 13, scope: !24) +!26 = distinct !DISubprogram(name: "test_noinline_bitcast", scope: !1, file: !1, line: 12, unit: !0) +!27 = !DILocation(line: 13, scope: !26) +!28 = distinct !DISubprogram(name: "foo_direct_i32", scope: !1, file: !1, line: 11, unit: !0) +!29 = distinct !DISubprogram(name: "return_arg", scope: !1, file: !1, line: 11, unit: !0) +!30 = distinct !DISubprogram(name: "return_arg_caller", scope: !1, file: !1, line: 11, unit: !0) +!31 = !DILocation(line: 12, scope: !30) +!32 = !DILocation(line: 13, scope: !30) +; CHECK: ![[M1]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398} +; CHECK: ![[M2]] = !{!"function_entry_count", i64 8001} +; CHECK: ![[M3]] = !{!"branch_weights", i32 4000, i32 4000} +; CHECK: ![[M4]] = !{!"branch_weights", i32 3000, i32 1000} +; CHECK: ![[M5]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000} +; CHECK: ![[M6]] = !{!"function_entry_count", i64 -1}