Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -853,6 +853,10 @@ if (CGM.getCodeGenOpts().ProfileSampleAccurate) Fn->addFnAttr("profile-sample-accurate"); + // Add use-sample-profile value. + if (!CGM.getCodeGenOpts().SampleProfileFile.empty()) + Fn->addFnAttr("use-sample-profile"); + if (D && D->hasAttr()) Fn->addFnAttr("cfi-canonical-jump-table"); Index: clang/test/CodeGen/use-sample-profile-attr.c =================================================================== --- /dev/null +++ clang/test/CodeGen/use-sample-profile-attr.c @@ -0,0 +1,18 @@ +// Test use-sample-profile attribute is present only when SampleFDO +// is enabled. +// +// RUN: %clang_cc1 -O2 -fno-experimental-new-pass-manager +// -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -emit-llvm -o - 2>&1 | +// FileCheck %s RUN: %clang_cc1 -O2 -fexperimental-new-pass-manager +// -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -emit-llvm -o - 2>&1 | +// FileCheck %s RUN: %clang_cc1 -O2 -fno-experimental-new-pass-manager %s +// -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=NOATTR RUN: %clang_cc1 -O2 +// -fexperimental-new-pass-manager %s -emit-llvm -o - 2>&1 | FileCheck %s +// --check-prefix=NOATTR + +// CHECK: define{{.*}} @func{{.*}} #[[ATTRID:[0-9]+]] +// CHECK: attributes #[[ATTRID]] = {{.*}} "use-sample-profile" +// NOATTR: define{{.*}} @func{{.*}} #[[ATTRID:[0-9]+]] +// NOATTR-NOT: attributes #[[ATTRID]] = {{.*}} "use-sample-profile" + +int func(int a) { return a; } Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -234,6 +234,7 @@ def NoJumpTables : StrBoolAttr<"no-jump-tables">; def NoInlineLineTables : StrBoolAttr<"no-inline-line-tables">; def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">; +def UseSampleProfile : StrBoolAttr<"use-sample-profile">; class CompatRule { // The name of the function called to check the attribute of the caller and @@ -252,6 +253,7 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; class MergeRule { // The name of the function called to merge the attributes of the caller and Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1042,6 +1042,7 @@ if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && + R->getValue()->hasFnAttribute("use-sample-profile") && isLegalToPromote(*I, R->getValue(), &Reason)) { uint64_t C = FS->getEntrySamples(); auto &DI = @@ -1785,7 +1786,7 @@ if (!ProfileTopDownLoad || CG == nullptr) { for (Function &F : M) - if (!F.isDeclaration()) + if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(&F); return FunctionOrderList; } @@ -1795,7 +1796,7 @@ while (!CGI.isAtEnd()) { for (CallGraphNode *node : *CGI) { auto F = node->getFunction(); - if (F && !F->isDeclaration()) + if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(F); } ++CGI; Index: llvm/test/Transforms/Inline/inline-incompat-attrs.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/inline-incompat-attrs.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -passes=inline -inline-threshold=100 -S | FileCheck %s + +; caller1/caller2/callee1/callee2 test functions with incompatible attributes +; won't be inlined into each other. + +define i32 @callee1(i32 %x) { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @callee2(i32 %x) #0 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller1(i32 %y1) { +; caller1 doesn't have use-sample-profile attribute but callee2 has, +; so callee2 won't be inlined into caller1. +; caller1 and callee1 don't have use-sample-profile attribute, so +; callee1 can be inlined into caller1. +; CHECK-LABEL: @caller1( +; CHECK: call i32 @callee2 +; CHECK-NOT: call i32 @callee1 + %y2 = call i32 @callee2(i32 %y1) + %y3 = call i32 @callee1(i32 %y2) + ret i32 %y3 +} + +define i32 @caller2(i32 %y1) #0 { +; caller2 and callee2 both have use-sample-profile attribute, so +; callee2 can be inlined into caller2. +; caller2 has use-sample-profile attribute but callee1 doesn't have, +; so callee1 won't be inlined into caller2. +; CHECK-LABEL: @caller2( +; CHECK-NOT: call i32 @callee2 +; CHECK: call i32 @callee1 + %y2 = call i32 @callee2(i32 %y1) + %y3 = call i32 @callee1(i32 %y2) + ret i32 %y3 +} + +declare void @extern() + +attributes #0 = { "use-sample-profile" } + Index: llvm/test/Transforms/Inline/partial-inline-incompat-attrs.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/partial-inline-incompat-attrs.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -passes=partial-inliner -S 2>&1| FileCheck %s + +define i32 @callee1(i32 %arg) { +bb: + %tmp = icmp slt i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb2 + +bb1: + br i1 undef, label %bb4, label %bb2 + +bb2: + br i1 undef, label %bb4, label %bb5 + +bb4: + %xx1 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + %xx2 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + %xx3 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + tail call void (...) @extern() #2 + br label %bb5 + +bb5: + %tmp6 = phi i32 [ 1, %bb2 ], [ 9, %bb4 ] + ret i32 %tmp6 +} + +declare void @extern(...) + +define i32 @caller1(i32 %arg) { +bb: +; partial inliner inlines callee to caller. +; CHECK-LABEL: @caller1 +; CHECK: br i1 +; CHECK: br i1 +; CHECK-NOT: call i32 @callee1( + %tmp = tail call i32 @callee1(i32 %arg) + ret i32 %tmp +} + +define i32 @caller2(i32 %arg) #0 { +bb: +; partial inliner won't inline callee to caller because they have +; incompatible attributes. +; CHECK-LABEL: @caller2 +; CHECK: call i32 @callee1( + %tmp = tail call i32 @callee1(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { "use-sample-profile" } Index: llvm/test/Transforms/SampleProfile/Inputs/use-sample-profile-attr.prof =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/Inputs/use-sample-profile-attr.prof @@ -0,0 +1,15 @@ +foo:5860:10 + 0: 1820 + 1: 1820 + 2: 1820 +goo:1820:20 + 0: 1820 +main:225715:1 + 2.1: 5553 + 3: 5391 + 3.1: foo:5860 + 0: 5279 + 1: 5279 + 2: 5279 + 4: goo:5860 + 1: 5860 Index: llvm/test/Transforms/SampleProfile/use-sample-profile-attr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/use-sample-profile-attr.ll @@ -0,0 +1,118 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/use-sample-profile-attr.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/use-sample-profile-attr.prof -S | FileCheck %s + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; @goo doesn't have use-sample-profile attribute so it won't have +; profile annotated. +; CHECK-NOT: @goo{{.*}} !prof +define void @goo() !dbg !26 { + ret void +} + +; @foo has use-sample-profile attribute so it will have profile annotated. +; CHECK: @foo{{.*}} !prof ![[HDRCNT1:[0-9]+]] +define i32 @foo(i32 %x, i32 %y) #0 !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4, !dbg !11 + %1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; @main has use-sample-profile attribute so it will have profile annotated. +; CHECK: @main{{.*}} !prof ![[HDRCNT2:[0-9]+]] +define i32 @main() #1 !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 + + +if.then: ; preds = %while.body + %2 = load i32, i32* %i, align 4, !dbg !18 + %3 = load i32, i32* %s, align 4, !dbg !18 +; @foo is inlined because the callsite is hot and @foo has use-sample-profile +; attribute. +; CHECK: if.then: +; CHECK-NOT: call i32 @foo + %call = call i32 @foo(i32 %2, i32 %3), !dbg !18 + store i32 %call, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body +; @goo is not inlined because @goo doesn't have use-sample-profile attribute. +; CHECK: if.else: +; CHECK: call void @goo + call void @goo(), !dbg !27 + store i32 30, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %s, align 4, !dbg !24 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24 + ret i32 0, !dbg !25 +} + +; CHECK: ![[HDRCNT1]] = !{!"function_entry_count", i64 11} +; CHECK: ![[HDRCNT2]] = !{!"function_entry_count", i64 2} + +attributes #0 = {"use-sample-profile"} +attributes #1 = {"use-sample-profile"} + +declare i32 @printf(i8*, ...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) +!26 = distinct !DISubprogram(name: "goo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!27 = !DILocation(line: 11, column: 20, scope: !7) +