Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -853,6 +853,9 @@ if (CGM.getCodeGenOpts().ProfileSampleAccurate) Fn->addFnAttr("profile-sample-accurate"); + if (!CGM.getCodeGenOpts().SampleProfileFile.empty()) + Fn->addFnAttr("use-sample-profile"); + if (D && D->hasAttr()) Fn->addFnAttr("cfi-canonical-jump-table"); Index: clang/test/CodeGen/use-sample-profile-attr.c =================================================================== --- /dev/null +++ clang/test/CodeGen/use-sample-profile-attr.c @@ -0,0 +1,20 @@ +// Test use-sample-profile attribute is present only when SampleFDO +// is enabled. +// +// RUN: %clang_cc1 -O2 -fno-experimental-new-pass-manager \ +// RUN: -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -emit-llvm -o - \ +// RUN: 2>&1 | FileCheck %s +// RUN: %clang_cc1 -O2 -fexperimental-new-pass-manager \ +// RUN: -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -emit-llvm -o - \ +// RUN: 2>&1 | FileCheck %s +// RUN: %clang_cc1 -O2 -fno-experimental-new-pass-manager %s \ +// RUN: -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=NOATTR +// RUN: %clang_cc1 -O2 -fexperimental-new-pass-manager %s -emit-llvm -o - \ +// RUN: 2>&1 | FileCheck %s --check-prefix=NOATTR + +// CHECK: define{{.*}} @func{{.*}} #[[ATTRID:[0-9]+]] +// CHECK: attributes #[[ATTRID]] = {{.*}} "use-sample-profile" +// NOATTR: define{{.*}} @func{{.*}} #[[ATTRID:[0-9]+]] +// NOATTR-NOT: attributes #[[ATTRID]] = {{.*}} "use-sample-profile" + +int func(int a) { return a; } Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -234,6 +234,7 @@ def NoJumpTables : StrBoolAttr<"no-jump-tables">; def NoInlineLineTables : StrBoolAttr<"no-inline-line-tables">; def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">; +def UseSampleProfile : StrBoolAttr<"use-sample-profile">; class CompatRule { // The name of the function called to check the attribute of the caller and @@ -252,6 +253,7 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; class MergeRule { // The name of the function called to merge the attributes of the caller and Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1042,6 +1042,7 @@ if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && + R->getValue()->hasFnAttribute("use-sample-profile") && isLegalToPromote(*I, R->getValue(), &Reason)) { uint64_t C = FS->getEntrySamples(); auto &DI = @@ -1785,7 +1786,7 @@ if (!ProfileTopDownLoad || CG == nullptr) { for (Function &F : M) - if (!F.isDeclaration()) + if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(&F); return FunctionOrderList; } @@ -1795,7 +1796,7 @@ while (!CGI.isAtEnd()) { for (CallGraphNode *node : *CGI) { auto F = node->getFunction(); - if (F && !F->isDeclaration()) + if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(F); } ++CGI; Index: llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll =================================================================== --- llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll +++ llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll @@ -17,7 +17,7 @@ ; CHECK-LABEL: @test ; Checks that the call instruction is promoted to direct call and has ; profile count annotated on the direct call. -define void @test(void ()*) !dbg !7 { +define void @test(void ()*) #0 !dbg !7 { %2 = alloca void ()* store void ()* %0, void ()** %2 %3 = load void ()*, void ()** %2 @@ -28,6 +28,8 @@ declare void @bar() local_unnamed_addr +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5} !llvm.ident = !{!6} Index: llvm/test/LTO/Resolution/X86/load-sample-prof-lto.ll =================================================================== --- llvm/test/LTO/Resolution/X86/load-sample-prof-lto.ll +++ llvm/test/LTO/Resolution/X86/load-sample-prof-lto.ll @@ -20,7 +20,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo() local_unnamed_addr !dbg !7 { +define void @foo() #0 !dbg !7 { entry: tail call void @bar(), !dbg !10 ret void, !dbg !11 @@ -28,6 +28,8 @@ declare void @bar() local_unnamed_addr +attributes #0 = {"local_unnamed_addr" "use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5} !llvm.ident = !{!6} Index: llvm/test/LTO/Resolution/X86/load-sample-prof.ll =================================================================== --- llvm/test/LTO/Resolution/X86/load-sample-prof.ll +++ llvm/test/LTO/Resolution/X86/load-sample-prof.ll @@ -11,7 +11,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo() local_unnamed_addr !dbg !7 { +define void @foo() #0 !dbg !7 { entry: tail call void @bar(), !dbg !10 ret void, !dbg !11 @@ -19,6 +19,8 @@ declare void @bar() local_unnamed_addr +attributes #0 = {"local_unnamed_addr" "use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5} !llvm.ident = !{!6} Index: llvm/test/Transforms/Inline/inline-incompat-attrs.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/inline-incompat-attrs.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -passes=inline -inline-threshold=100 -S | FileCheck %s + +;; caller1/caller2/callee1/callee2 test functions with incompatible attributes +;; won't be inlined into each other. + +define i32 @callee1(i32 %x) { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @callee2(i32 %x) #0 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller1(i32 %y1) { +;; caller1 doesn't have use-sample-profile attribute but callee2 has, +;; so callee2 won't be inlined into caller1. +;; caller1 and callee1 don't have use-sample-profile attribute, so +;; callee1 can be inlined into caller1. +; CHECK-LABEL: @caller1( +; CHECK: call i32 @callee2 +; CHECK-NOT: call i32 @callee1 + %y2 = call i32 @callee2(i32 %y1) + %y3 = call i32 @callee1(i32 %y2) + ret i32 %y3 +} + +define i32 @caller2(i32 %y1) #0 { +;; caller2 and callee2 both have use-sample-profile attribute, so +;; callee2 can be inlined into caller2. +;; caller2 has use-sample-profile attribute but callee1 doesn't have, +;; so callee1 won't be inlined into caller2. +; CHECK-LABEL: @caller2( +; CHECK-NOT: call i32 @callee2 +; CHECK: call i32 @callee1 + %y2 = call i32 @callee2(i32 %y1) + %y3 = call i32 @callee1(i32 %y2) + ret i32 %y3 +} + +declare void @extern() + +attributes #0 = { "use-sample-profile" } Index: llvm/test/Transforms/Inline/partial-inline-incompat-attrs.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/partial-inline-incompat-attrs.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -passes=partial-inliner -S 2>&1| FileCheck %s + +define i32 @callee1(i32 %arg) { +bb: + %tmp = icmp slt i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb2 + +bb1: + br i1 undef, label %bb4, label %bb2 + +bb2: + br i1 undef, label %bb4, label %bb5 + +bb4: + %xx1 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + %xx2 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + %xx3 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + tail call void (...) @extern() #2 + br label %bb5 + +bb5: + %tmp6 = phi i32 [ 1, %bb2 ], [ 9, %bb4 ] + ret i32 %tmp6 +} + +declare void @extern(...) + +define i32 @caller1(i32 %arg) { +bb: +;; partial inliner inlines callee to caller. +; CHECK-LABEL: @caller1 +; CHECK: br i1 +; CHECK: br i1 +; CHECK-NOT: call i32 @callee1( + %tmp = tail call i32 @callee1(i32 %arg) + ret i32 %tmp +} + +define i32 @caller2(i32 %arg) #0 { +bb: +;; partial inliner won't inline callee to caller because they have +;; incompatible attributes. +; CHECK-LABEL: @caller2 +; CHECK: call i32 @callee1( + %tmp = tail call i32 @callee1(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { "use-sample-profile" } Index: llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll =================================================================== --- llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll +++ llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll @@ -90,10 +90,10 @@ ; Function Attrs: nofree nounwind declare dso_local i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #3 -attributes #0 = { noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #1 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #2 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5} Index: llvm/test/Transforms/SampleProfile/Inputs/use-sample-profile-attr.prof =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/Inputs/use-sample-profile-attr.prof @@ -0,0 +1,15 @@ +foo:5860:10 + 0: 1820 + 1: 1820 + 2: 1820 +goo:1820:20 + 0: 1820 +main:225715:1 + 2.1: 5553 + 3: 5391 + 3.1: foo:5860 + 0: 5279 + 1: 5279 + 2: 5279 + 4: goo:5860 + 1: 5860 Index: llvm/test/Transforms/SampleProfile/branch.ll =================================================================== --- llvm/test/Transforms/SampleProfile/branch.ll +++ llvm/test/Transforms/SampleProfile/branch.ll @@ -150,7 +150,7 @@ declare i32 @printf(i8*, ...) #3 -attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: llvm/test/Transforms/SampleProfile/calls.ll =================================================================== --- llvm/test/Transforms/SampleProfile/calls.ll +++ llvm/test/Transforms/SampleProfile/calls.ll @@ -20,7 +20,7 @@ @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 ; Function Attrs: nounwind uwtable -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !4 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -33,7 +33,7 @@ } ; Function Attrs: uwtable -define i32 @main() !dbg !7 { +define i32 @main() #0 !dbg !7 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -85,6 +85,8 @@ declare i32 @printf(i8*, ...) #2 +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} Index: llvm/test/Transforms/SampleProfile/cold-indirect-call.ll =================================================================== --- llvm/test/Transforms/SampleProfile/cold-indirect-call.ll +++ llvm/test/Transforms/SampleProfile/cold-indirect-call.ll @@ -1,21 +1,22 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/cold-indirect-call.prof -S | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/cold-indirect-call.prof -S | FileCheck %s -define i32 @foo(i32 ()* %func) !dbg !3 { +define i32 @foo(i32 ()* %func) #0 !dbg !3 { ; CHECK: icmp {{.*}} @bar ; CHECK-NOT: icmp {{.*}} @baz %call = call i32 %func(), !dbg !4 ret i32 %call } -define i32 @bar() !dbg !5 { +define i32 @bar() #0 !dbg !5 { ret i32 41, !dbg !6 } -define i32 @baz() !dbg !7 { +define i32 @baz() #0 !dbg !7 { ret i32 42, !dbg !8 } +attributes #0 = {"use-sample-profile"} !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} Index: llvm/test/Transforms/SampleProfile/cov-zero-samples.ll =================================================================== --- llvm/test/Transforms/SampleProfile/cov-zero-samples.ll +++ llvm/test/Transforms/SampleProfile/cov-zero-samples.ll @@ -18,7 +18,7 @@ @N = global i64 8000000000, align 8, !dbg !0 @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 -define i32 @_Z12never_calledi(i32 %i) !dbg !11 { +define i32 @_Z12never_calledi(i32 %i) #1 !dbg !11 { entry: ret i32 0, !dbg !15 } @@ -26,7 +26,7 @@ ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 -define i32 @main() !dbg !17 { +define i32 @main() #1 !dbg !17 { entry: %retval = alloca i32, align 4 %sum = alloca i32, align 4 @@ -85,6 +85,7 @@ declare i32 @printf(i8*, ...) attributes #0 = { nounwind readnone } +attributes #1 = {"use-sample-profile"} !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!8, !9} Index: llvm/test/Transforms/SampleProfile/coverage-warning.ll =================================================================== --- llvm/test/Transforms/SampleProfile/coverage-warning.ll +++ llvm/test/Transforms/SampleProfile/coverage-warning.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s -define i32 @foo(i32 %i) !dbg !4 { +define i32 @foo(i32 %i) #0 !dbg !4 { ; The profile has samples for line locations that are no longer present. ; Coverage does not reach 90%, so we should get this warning: ; @@ -27,6 +27,8 @@ ret i32 %1, !dbg !13 } +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!6, !7} !llvm.ident = !{!8} Index: llvm/test/Transforms/SampleProfile/discriminator.ll =================================================================== --- llvm/test/Transforms/SampleProfile/discriminator.ll +++ llvm/test/Transforms/SampleProfile/discriminator.ll @@ -62,6 +62,7 @@ ret i32 %4, !dbg !21 } +attributes #0 = {"use-sample-profile"} !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!7, !8} Index: llvm/test/Transforms/SampleProfile/early-inline.ll =================================================================== --- llvm/test/Transforms/SampleProfile/early-inline.ll +++ llvm/test/Transforms/SampleProfile/early-inline.ll @@ -54,8 +54,8 @@ declare i32 @__gxx_personality_v0(...) -attributes #0 = {"target-features"="+sse4.1"} -attributes #1 = {"target-features"="+sse4.2"} +attributes #0 = {"target-features"="+sse4.1" "use-sample-profile"} +attributes #1 = {"target-features"="+sse4.2" "use-sample-profile"} !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} Index: llvm/test/Transforms/SampleProfile/entry_counts.ll =================================================================== --- llvm/test/Transforms/SampleProfile/entry_counts.ll +++ llvm/test/Transforms/SampleProfile/entry_counts.ll @@ -4,18 +4,20 @@ ; According to the profile, function empty() was called 13,293 times. ; CHECK: {{.*}} = !{!"function_entry_count", i64 13294} -define void @empty() !dbg !4 { +define void @empty() #0 !dbg !4 { entry: ret void, !dbg !9 } ; This function does not have profile, check if function_entry_count is -1 ; CHECK: {{.*}} = !{!"function_entry_count", i64 -1} -define void @no_profile() { +define void @no_profile() #0 { entry: ret void } +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!6, !7} !llvm.ident = !{!8} Index: llvm/test/Transforms/SampleProfile/entry_counts_cold.ll =================================================================== --- llvm/test/Transforms/SampleProfile/entry_counts_cold.ll +++ llvm/test/Transforms/SampleProfile/entry_counts_cold.ll @@ -95,7 +95,7 @@ declare void @baz(...) #3 -attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { argmemonly nounwind } attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll =================================================================== --- llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll +++ llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll @@ -105,7 +105,7 @@ declare void @baz(...) #3 -attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { argmemonly nounwind } attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: llvm/test/Transforms/SampleProfile/fnptr.ll =================================================================== --- llvm/test/Transforms/SampleProfile/fnptr.ll +++ llvm/test/Transforms/SampleProfile/fnptr.ll @@ -127,6 +127,9 @@ ; Function Attrs: nounwind declare i32 @printf(i8* nocapture readonly, ...) #1 +attributes #0 = {"use-sample-profile"} +attributes #2 = {"use-sample-profile"} + !llvm.module.flags = !{!0} !llvm.ident = !{!1} !llvm.dbg.cu = !{!26} Index: llvm/test/Transforms/SampleProfile/function_metadata.ll =================================================================== --- llvm/test/Transforms/SampleProfile/function_metadata.ll +++ llvm/test/Transforms/SampleProfile/function_metadata.ll @@ -10,12 +10,12 @@ declare !dbg !13 void @bar_dbg() -define void @bar_available() !dbg !14 { +define void @bar_available() #0 !dbg !14 { ret void } ; CHECK: define void @test({{.*}} !prof ![[ENTRY_TEST:[0-9]+]] -define void @test(void ()*) !dbg !7 { +define void @test(void ()*) #0 !dbg !7 { %2 = alloca void ()* store void ()* %0, void ()** %2 %3 = load void ()*, void ()** %2 @@ -26,7 +26,7 @@ } ; CHECK: define void @test_liveness({{.*}} !prof ![[ENTRY_TEST_LIVENESS:[0-9]+]] -define void @test_liveness() !dbg !12 { +define void @test_liveness() #0 !dbg !12 { call void @foo(), !dbg !20 ret void } @@ -42,6 +42,8 @@ ; to bar. bar_available should not be included as it's within the same module. ; CHECK: ![[ENTRY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 6699318081062747564, i64 -2012135647395072713, i64 -1522495160813492905} +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} Index: llvm/test/Transforms/SampleProfile/gcc-simple.ll =================================================================== --- llvm/test/Transforms/SampleProfile/gcc-simple.ll +++ llvm/test/Transforms/SampleProfile/gcc-simple.ll @@ -136,7 +136,7 @@ ; CHECK ![[PROF3]] = !{!"branch_weights", i32 1, i32 1} ; CHECK ![[PROF4]] = !{!"branch_weights", i32 1, i32 20238} -attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind } Index: llvm/test/Transforms/SampleProfile/indirect-call-gcc.ll =================================================================== --- llvm/test/Transforms/SampleProfile/indirect-call-gcc.ll +++ llvm/test/Transforms/SampleProfile/indirect-call-gcc.ll @@ -6,7 +6,7 @@ ; not work. ; XFAIL: host-byteorder-big-endian -define void @test(void ()*) !dbg !3 { +define void @test(void ()*) #0 !dbg !3 { %2 = alloca void ()* store void ()* %0, void ()** %2 %3 = load void ()*, void ()** %2 @@ -15,6 +15,8 @@ ret void } +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} Index: llvm/test/Transforms/SampleProfile/indirect-call.ll =================================================================== --- llvm/test/Transforms/SampleProfile/indirect-call.ll +++ llvm/test/Transforms/SampleProfile/indirect-call.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.compact.afdo -S | FileCheck %s ; CHECK-LABEL: @test -define void @test(void ()*) !dbg !3 { +define void @test(void ()*) #0 !dbg !3 { %2 = alloca void ()* store void ()* %0, void ()** %2 %3 = load void ()*, void ()** %2 @@ -13,7 +13,7 @@ ; CHECK-LABEL: @test_inline ; If the indirect call is promoted and inlined in profile, we should promote and inline it. -define void @test_inline(i64* (i32*)*, i32* %x) !dbg !6 { +define void @test_inline(i64* (i32*)*, i32* %x) #0 !dbg !6 { %2 = alloca i64* (i32*)* store i64* (i32*)* %0, i64* (i32*)** %2 %3 = load i64* (i32*)*, i64* (i32*)** %2 @@ -35,7 +35,7 @@ ; CHECK-LABEL: @test_inline_strip ; If the indirect call is promoted and inlined in profile, and the callee name ; is stripped we should promote and inline it. -define void @test_inline_strip(i64* (i32*)*, i32* %x) !dbg !8 { +define void @test_inline_strip(i64* (i32*)*, i32* %x) #0 !dbg !8 { %2 = alloca i64* (i32*)* store i64* (i32*)* %0, i64* (i32*)** %2 %3 = load i64* (i32*)*, i64* (i32*)** %2 @@ -51,7 +51,7 @@ ; CHECK-LABEL: @test_inline_strip_conflict ; If the indirect call is promoted and inlined in profile, and the callee name ; is stripped, but have more than 1 potential match, we should not promote. -define void @test_inline_strip_conflict(i64* (i32*)*, i32* %x) !dbg !10 { +define void @test_inline_strip_conflict(i64* (i32*)*, i32* %x) #0 !dbg !10 { %2 = alloca i64* (i32*)* store i64* (i32*)* %0, i64* (i32*)** %2 %3 = load i64* (i32*)*, i64* (i32*)** %2 @@ -62,7 +62,7 @@ ; CHECK-LABEL: @test_noinline ; If the indirect call target is not available, we should not promote it. -define void @test_noinline(void ()*) !dbg !12 { +define void @test_noinline(void ()*) #0 !dbg !12 { %2 = alloca void ()* store void ()* %0, void ()** %2 %3 = load void ()*, void ()** %2 @@ -75,7 +75,7 @@ ; CHECK-LABEL: @test_noinline_bitcast ; If the indirect call has been promoted to a direct call with bitcast, ; do not inline it. -define float @test_noinline_bitcast(float ()*) !dbg !26 { +define float @test_noinline_bitcast(float ()*) #0 !dbg !26 { %2 = alloca float ()* store float ()* %0, float ()** %2 ; CHECK: icmp @@ -87,7 +87,7 @@ ; CHECK-LABEL: @test_norecursive_inline ; If the indirect call target is the caller, we should not promote it. -define void @test_norecursive_inline() !dbg !24 { +define void @test_norecursive_inline() #0 !dbg !24 { ; CHECK-NOT: icmp ; CHECK: call %1 = load void ()*, void ()** @y, align 8 @@ -95,7 +95,7 @@ ret void } -define i32* @return_arg(i32* readnone returned) !dbg !29{ +define i32* @return_arg(i32* readnone returned) #0 !dbg !29{ ret i32* %0 } @@ -103,7 +103,7 @@ ; When the promoted indirect call returns a parameter that was defined by the ; return value of a previous direct call. Checks both direct call and promoted ; indirect call are inlined. -define i32* @return_arg_caller(i32* (i32*)* nocapture) !dbg !30{ +define i32* @return_arg_caller(i32* (i32*)* nocapture) #0 !dbg !30{ ; CHECK-NOT: call i32* @foo_inline1 ; CHECK: if.true.direct_targ: ; CHECK-NOT: call @@ -124,7 +124,7 @@ ; CHECK-LABEL: @branch_prof_valid ; Check the conditional branch generated by indirect call promotion won't ; have invalid profile like !{!"branch_weights", i32 0, i32 0}. -define void @branch_prof_valid(void ()* %t0) !dbg !33 { +define void @branch_prof_valid(void ()* %t0) #0 !dbg !33 { %t1 = alloca void ()* store void ()* %t0, void ()** %t1 %t2 = load void ()*, void ()** %t1 @@ -137,49 +137,49 @@ @x = global i32 0, align 4 @y = global void ()* null, align 8 -define i32* @foo_inline1(i32* %x) !dbg !14 { +define i32* @foo_inline1(i32* %x) #0 !dbg !14 { ret i32* %x } -define i32* @foo_inline_strip.suffix(i32* %x) !dbg !15 { +define i32* @foo_inline_strip.suffix(i32* %x) #0 !dbg !15 { ret i32* %x } -define i32* @foo_inline_strip_conflict.suffix1(i32* %x) !dbg !16 { +define i32* @foo_inline_strip_conflict.suffix1(i32* %x) #0 !dbg !16 { ret i32* %x } -define i32* @foo_inline_strip_conflict.suffix2(i32* %x) !dbg !17 { +define i32* @foo_inline_strip_conflict.suffix2(i32* %x) #0 !dbg !17 { ret i32* %x } -define i32* @foo_inline_strip_conflict.suffix3(i32* %x) !dbg !18 { +define i32* @foo_inline_strip_conflict.suffix3(i32* %x) #0 !dbg !18 { ret i32* %x } -define i32* @foo_inline2(i32* %x) !dbg !19 { +define i32* @foo_inline2(i32* %x) #0 !dbg !19 { ret i32* %x } -define void @foo_inline3() !dbg !35 { +define void @foo_inline3() #0 !dbg !35 { ret void } -define i32 @foo_noinline(i32 %x) !dbg !20 { +define i32 @foo_noinline(i32 %x) #0 !dbg !20 { ret i32 %x } -define void @foo_direct() !dbg !21 { +define void @foo_direct() #0 !dbg !21 { ret void } -define i32 @foo_direct_i32() !dbg !28 { +define i32 @foo_direct_i32() #0 !dbg !28 { ret i32 0; } ; CHECK-LABEL: @test_direct ; We should not promote a direct call. -define void @test_direct() !dbg !22 { +define void @test_direct() #0 !dbg !22 { ; CHECK-NOT: icmp ; CHECK: call call void @foo_alias(), !dbg !23 @@ -188,6 +188,8 @@ @foo_alias = alias void (), void ()* @foo_direct +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} Index: llvm/test/Transforms/SampleProfile/inline-callee-update.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-callee-update.ll +++ llvm/test/Transforms/SampleProfile/inline-callee-update.ll @@ -6,7 +6,7 @@ @z = global i32* ()* null, align 8 ; CHECK: define i32* @sample_loader_inlinee() {{.*}} !prof ![[ENTRY:[0-9]+]] -define i32* @sample_loader_inlinee() !dbg !3 { +define i32* @sample_loader_inlinee() #0 !dbg !3 { bb: %tmp = call i32* @direct_leaf_func(i32* null), !dbg !4 %cmp = icmp ne i32* %tmp, null @@ -22,7 +22,7 @@ } ; CHECK: define i32* @cgscc_inlinee() {{.*}} !prof ![[ENTRY:[0-9]+]] -define i32* @cgscc_inlinee() !dbg !6 { +define i32* @cgscc_inlinee() #0 !dbg !6 { bb: %tmp = call i32* @direct_leaf_func(i32* null), !dbg !7 %cmp = icmp ne i32* %tmp, null @@ -37,13 +37,13 @@ ret i32* null } -define i32* @test_sample_loader_inline(void ()* %arg) !dbg !9 { +define i32* @test_sample_loader_inline(void ()* %arg) #0 !dbg !9 { bb: %tmp = call i32* @sample_loader_inlinee(), !dbg !10 ret i32* %tmp } -define i32* @test_cgscc_inline(void ()* %arg) !dbg !11 { +define i32* @test_cgscc_inline(void ()* %arg) #0 !dbg !11 { bb: %tmp = call i32* @cgscc_inlinee(), !dbg !12 ret i32* %tmp @@ -51,6 +51,8 @@ declare i32* @direct_leaf_func(i32*) +attributes #0 = {"use-sample-profile"} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} Index: llvm/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll +++ llvm/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE declare void @extern() -define void @callee() { +define void @callee() #1 { call void @extern() ret void } -define void @caller(i32 %y1) { +define void @caller(i32 %y1) #1 { ; CHECK-LABEL: @caller ; CHECK-NOT: call void @callee ; ACCURATE-LABEL: @caller @@ -28,4 +28,5 @@ ret void } -attributes #0 = { "profile-sample-accurate" } +attributes #0 = { "profile-sample-accurate" "use-sample-profile" } +attributes #1 = { "use-sample-profile" } Index: llvm/test/Transforms/SampleProfile/inline-cold.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-cold.ll +++ llvm/test/Transforms/SampleProfile/inline-cold.ll @@ -15,7 +15,7 @@ @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !6 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -27,7 +27,7 @@ ret i32 %add, !dbg !8 } -define i32 @main() !dbg !9 { +define i32 @main() #0 !dbg !9 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -70,6 +70,8 @@ ret i32 0, !dbg !23 } +attributes #0 = { "use-sample-profile" } + declare i32 @printf(i8*, ...) !llvm.dbg.cu = !{!0} Index: llvm/test/Transforms/SampleProfile/inline-combine.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-combine.ll +++ llvm/test/Transforms/SampleProfile/inline-combine.ll @@ -16,7 +16,7 @@ $bar = comdat any -define void @foo(%"class.llvm::FoldingSetNodeID"* %this) comdat align 2 !dbg !3 { +define void @foo(%"class.llvm::FoldingSetNodeID"* %this) #0 align 2 !dbg !3 { %1 = alloca %"class.llvm::FoldingSetNodeID"*, align 8 store %"class.llvm::FoldingSetNodeID"* %this, %"class.llvm::FoldingSetNodeID"** %1, align 8 %2 = load %"class.llvm::FoldingSetNodeID"*, %"class.llvm::FoldingSetNodeID"** %1, align 8 @@ -27,10 +27,12 @@ ret void } -define void @bar(%"class.llvm::SmallVectorImpl"* %this) comdat align 2 !dbg !8 { +define void @bar(%"class.llvm::SmallVectorImpl"* %this) #0 align 2 !dbg !8 { ret void } +attributes #0 = { "comdat" "use-sample-profile" } + !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !llvm.dbg.cu = !{!9} Index: llvm/test/Transforms/SampleProfile/inline-coverage.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-coverage.ll +++ llvm/test/Transforms/SampleProfile/inline-coverage.ll @@ -32,7 +32,7 @@ ; 110% coverage check. ; CHECK: warning: coverage.cc:7: 78834 of 78834 available profile samples (100%) were applied -define i64 @_Z3fool(i64 %i) !dbg !4 { +define i64 @_Z3fool(i64 %i) #0 !dbg !4 { entry: %i.addr = alloca i64, align 8 store i64 %i, i64* %i.addr, align 8 @@ -48,7 +48,7 @@ declare i32 @rand() -define i32 @main() !dbg !9 { +define i32 @main() #0 !dbg !9 { entry: %retval = alloca i32, align 4 %sum = alloca i64, align 8 @@ -87,6 +87,8 @@ ret i32 %cond, !dbg !42 } +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!13, !14} !llvm.ident = !{!15} Index: llvm/test/Transforms/SampleProfile/inline-mergeprof.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-mergeprof.ll +++ llvm/test/Transforms/SampleProfile/inline-mergeprof.ll @@ -8,7 +8,7 @@ @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 -define i32 @main() !dbg !6 { +define i32 @main() #0 !dbg !6 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -22,7 +22,7 @@ ret i32 0, !dbg !11 } -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !12 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !12 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -44,7 +44,7 @@ ret i32 %add, !dbg !15 } -define i32 @_Z3subii(i32 %x, i32 %y) !dbg !16 { +define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !16 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -56,6 +56,8 @@ ret i32 %add, !dbg !18 } +attributes #0 = { "use-sample-profile" } + declare i32 @printf(i8*, ...) !llvm.dbg.cu = !{!0} @@ -94,4 +96,4 @@ ; MERGE: !{!"branch_weights", i32 11, i32 23} ; MERGE: !{!"branch_weights", i32 10} ; MERGE: name: "sub" -; MERGE-NEXT: {!"function_entry_count", i64 3} \ No newline at end of file +; MERGE-NEXT: {!"function_entry_count", i64 3} Index: llvm/test/Transforms/SampleProfile/inline-stats.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-stats.ll +++ llvm/test/Transforms/SampleProfile/inline-stats.ll @@ -19,7 +19,7 @@ ; } ; @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !6 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -30,7 +30,7 @@ %add = add nsw i32 %tmp, %tmp1, !dbg !8 ret i32 %add, !dbg !8 } -define i32 @main() !dbg !9 { +define i32 @main() #0 !dbg !9 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -72,6 +72,8 @@ } declare i32 @printf(i8*, ...) +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} !llvm.ident = !{!5} @@ -101,4 +103,4 @@ !22 = !DILocation(line: 11, scope: !9) !23 = !DILocation(line: 12, scope: !9) -; CHECK: 1 sample-profile - Number of functions inlined with context sensitive profile \ No newline at end of file +; CHECK: 1 sample-profile - Number of functions inlined with context sensitive profile Index: llvm/test/Transforms/SampleProfile/inline-topdown.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline-topdown.ll +++ llvm/test/Transforms/SampleProfile/inline-topdown.ll @@ -9,7 +9,7 @@ @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !6 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -24,7 +24,7 @@ ret i32 %add, !dbg !8 } -define i32 @_Z3subii(i32 %x, i32 %y) !dbg !9 { +define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -36,7 +36,7 @@ ret i32 %add, !dbg !11 } -define i32 @main() !dbg !12 { +define i32 @main() #0 !dbg !12 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -79,6 +79,8 @@ declare i32 @printf(i8*, ...) +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} !llvm.ident = !{!5} @@ -120,4 +122,4 @@ ; TOPDOWN: @_Z3sumii ; TOPDOWN-NOT: call i32 @_Z3subii ; TOPDOWN: @main() -; TOPDOWN: call i32 @_Z3subii \ No newline at end of file +; TOPDOWN: call i32 @_Z3subii Index: llvm/test/Transforms/SampleProfile/inline.ll =================================================================== --- llvm/test/Transforms/SampleProfile/inline.ll +++ llvm/test/Transforms/SampleProfile/inline.ll @@ -20,7 +20,7 @@ @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 ; Function Attrs: nounwind uwtable -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !4 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -33,7 +33,7 @@ } ; Function Attrs: uwtable -define i32 @main() !dbg !7 { +define i32 @main() #0 !dbg !7 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -78,6 +78,8 @@ declare i32 @printf(i8*, ...) #2 +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} Index: llvm/test/Transforms/SampleProfile/nolocinfo.ll =================================================================== --- llvm/test/Transforms/SampleProfile/nolocinfo.ll +++ llvm/test/Transforms/SampleProfile/nolocinfo.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s -define i32 @foo(i32 %i) !dbg !4 { +define i32 @foo(i32 %i) #0 !dbg !4 { entry: %i.addr = alloca i32, align 4 %0 = load i32, i32* %i.addr, align 4 @@ -20,6 +20,8 @@ ret i32 1 } +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} Index: llvm/test/Transforms/SampleProfile/offset.ll =================================================================== --- llvm/test/Transforms/SampleProfile/offset.ll +++ llvm/test/Transforms/SampleProfile/offset.ll @@ -48,7 +48,7 @@ ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 -attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} Index: llvm/test/Transforms/SampleProfile/profile-format-compress.ll =================================================================== --- llvm/test/Transforms/SampleProfile/profile-format-compress.ll +++ llvm/test/Transforms/SampleProfile/profile-format-compress.ll @@ -35,7 +35,7 @@ ; CHECK: ![[IDX3]] = !{!"branch_weights", i32 1} ; Function Attrs: nounwind uwtable -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !4 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -48,7 +48,7 @@ } ; Function Attrs: uwtable -define i32 @main() !dbg !7 { +define i32 @main() #0 !dbg !7 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -92,6 +92,8 @@ declare i32 @printf(i8*, ...) #2 +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} Index: llvm/test/Transforms/SampleProfile/profile-format.ll =================================================================== --- llvm/test/Transforms/SampleProfile/profile-format.ll +++ llvm/test/Transforms/SampleProfile/profile-format.ll @@ -37,7 +37,7 @@ ; CHECK: ![[IDX3]] = !{!"branch_weights", i32 1} ; Function Attrs: nounwind uwtable -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !4 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -50,7 +50,7 @@ } ; Function Attrs: uwtable -define i32 @main() !dbg !7 { +define i32 @main() #0 !dbg !7 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -94,6 +94,8 @@ declare i32 @printf(i8*, ...) #2 +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} Index: llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll =================================================================== --- llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll +++ llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll @@ -48,7 +48,7 @@ ; PROFSYMLIST: define i32 @_Z3sumii{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]] ; ; Function Attrs: nounwind uwtable -define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !4 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -61,7 +61,7 @@ } ; Function Attrs: uwtable -define i32 @main() !dbg !7 { +define i32 @main() #0 !dbg !7 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -123,6 +123,8 @@ declare i32 @printf(i8*, ...) #2 +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} Index: llvm/test/Transforms/SampleProfile/propagate.ll =================================================================== --- llvm/test/Transforms/SampleProfile/propagate.ll +++ llvm/test/Transforms/SampleProfile/propagate.ll @@ -201,9 +201,9 @@ declare i32 @printf(i8*, ...) #3 -attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #1 = { nounwind readnone } -attributes #2 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!0} Index: llvm/test/Transforms/SampleProfile/remap.ll =================================================================== --- llvm/test/Transforms/SampleProfile/remap.ll +++ llvm/test/Transforms/SampleProfile/remap.ll @@ -8,7 +8,7 @@ declare i1 @foo() -define void @_ZN3foo3barERKN1M1XINS_6detail3quxEEE() !dbg !2 { +define void @_ZN3foo3barERKN1M1XINS_6detail3quxEEE() #0 !dbg !2 { ; CHECK: Printing analysis 'Branch Probability Analysis' for function '_ZN3foo3barERKN1M1XINS_6detail3quxEEE': entry: @@ -49,6 +49,8 @@ ret void } +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!4, !5} Index: llvm/test/Transforms/SampleProfile/remarks.ll =================================================================== --- llvm/test/Transforms/SampleProfile/remarks.ll +++ llvm/test/Transforms/SampleProfile/remarks.ll @@ -155,7 +155,7 @@ ret i32 %conv, !dbg !58 } -attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } attributes #1 = { nounwind argmemonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll =================================================================== --- llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll +++ llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll @@ -17,7 +17,7 @@ ; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]] ; The function not appearing in profile is cold when -profile-sample-accurate ; is on. -define void @foo_not_in_profile() { +define void @foo_not_in_profile() #1 { call void @hot_func() ret void } @@ -31,7 +31,8 @@ ret void } -attributes #0 = { "profile-sample-accurate" } +attributes #0 = { "profile-sample-accurate" "use-sample-profile" } +attributes #1 = { "use-sample-profile" } ; CHECK: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1} ; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} Index: llvm/test/Transforms/SampleProfile/syntax.ll =================================================================== --- llvm/test/Transforms/SampleProfile/syntax.ll +++ llvm/test/Transforms/SampleProfile/syntax.ll @@ -16,10 +16,13 @@ ; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null -define void @empty() { +define void @empty() #0 { entry: ret void } + +attributes #0 = { "use-sample-profile" } + ; NO-DEBUG: warning: No debug information found in function empty: Function profile not used ; MISSING-FILE: missing.prof: Could not open profile: ; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof: Could not open profile: Unrecognized sample profile encoding format Index: llvm/test/Transforms/SampleProfile/use-sample-profile-attr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/use-sample-profile-attr.ll @@ -0,0 +1,117 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/use-sample-profile-attr.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/use-sample-profile-attr.prof -S | FileCheck %s + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +;; @goo doesn't have use-sample-profile attribute so it won't have +;; profile annotated. +; CHECK-NOT: @goo{{.*}} !prof +define void @goo() !dbg !26 { + ret void +} + +;; @foo has use-sample-profile attribute so it will have profile annotated. +; CHECK: @foo{{.*}} !prof ![[HDRCNT1:[0-9]+]] +define i32 @foo(i32 %x, i32 %y) #0 !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4, !dbg !11 + %1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +;; @main has use-sample-profile attribute so it will have profile annotated. +; CHECK: @main{{.*}} !prof ![[HDRCNT2:[0-9]+]] +define i32 @main() #1 !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 + + +if.then: ; preds = %while.body + %2 = load i32, i32* %i, align 4, !dbg !18 + %3 = load i32, i32* %s, align 4, !dbg !18 +;; @foo is inlined because the callsite is hot and @foo has use-sample-profile +;; attribute. +; CHECK: if.then: +; CHECK-NOT: call i32 @foo + %call = call i32 @foo(i32 %2, i32 %3), !dbg !18 + store i32 %call, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body +;; @goo is not inlined because @goo doesn't have use-sample-profile attribute. +; CHECK: if.else: +; CHECK: call void @goo + call void @goo(), !dbg !27 + store i32 30, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %s, align 4, !dbg !24 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24 + ret i32 0, !dbg !25 +} + +; CHECK: ![[HDRCNT1]] = !{!"function_entry_count", i64 11} +; CHECK: ![[HDRCNT2]] = !{!"function_entry_count", i64 2} + +attributes #0 = {"use-sample-profile"} +attributes #1 = {"use-sample-profile"} + +declare i32 @printf(i8*, ...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) +!26 = distinct !DISubprogram(name: "goo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!27 = !DILocation(line: 11, column: 20, scope: !7) Index: llvm/test/Transforms/SampleProfile/warm-inline-instance.ll =================================================================== --- llvm/test/Transforms/SampleProfile/warm-inline-instance.ll +++ llvm/test/Transforms/SampleProfile/warm-inline-instance.ll @@ -4,7 +4,7 @@ @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 ; Function Attrs: nounwind uwtable -define i32 @foo(i32 %x, i32 %y) !dbg !4 { +define i32 @foo(i32 %x, i32 %y) #0 !dbg !4 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -16,7 +16,7 @@ ret i32 %add, !dbg !11 } -define i32 @goo(i32 %x, i32 %y) { +define i32 @goo(i32 %x, i32 %y) #0 { entry: %x.addr = alloca i32, align 4 %y.addr = alloca i32, align 4 @@ -29,7 +29,7 @@ } ; Function Attrs: uwtable -define i32 @main() !dbg !7 { +define i32 @main() #0 !dbg !7 { entry: %retval = alloca i32, align 4 %s = alloca i32, align 4 @@ -83,6 +83,8 @@ declare i32 @printf(i8*, ...) #2 +attributes #0 = { "use-sample-profile" } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10}