Index: lib/CodeGen/CodeGenPGO.cpp =================================================================== --- lib/CodeGen/CodeGenPGO.cpp +++ lib/CodeGen/CodeGenPGO.cpp @@ -47,6 +47,15 @@ llvm::createPGOFuncNameMetadata(*Fn, FuncName); } +/// The version of the PGO hash algorithm. +enum PGOHashVersion : unsigned { + PGO_HASH_V1, + PGO_HASH_V2, + + // Keep this set to the latest hash version. + PGO_HASH_LATEST = PGO_HASH_V2 +}; + namespace { /// \brief Stable hasher for PGO region counters. /// @@ -61,6 +70,7 @@ class PGOHash { uint64_t Working; unsigned Count; + PGOHashVersion HashVersion; llvm::MD5 MD5; static const int NumBitsPerType = 6; @@ -93,24 +103,53 @@ BinaryOperatorLAnd, BinaryOperatorLOr, BinaryConditionalOperator, + // The preceding values are available with PGO_HASH_V1. + + EndOfScope, + IfThenBranch, + IfElseBranch, + GotoStmt, + IndirectGotoStmt, + BreakStmt, + ContinueStmt, + ReturnStmt, + ThrowExpr, + UnaryOperatorLNot, + BinaryOperatorLT, + BinaryOperatorGT, + BinaryOperatorLE, + BinaryOperatorGE, + BinaryOperatorEQ, + BinaryOperatorNE, + // The preceding values are available with PGO_HASH_V2. // Keep this last. It's for the static assert that follows. LastHashType }; static_assert(LastHashType <= TooBig, "Too many types in HashType"); - // TODO: When this format changes, take in a version number here, and use the - // old hash calculation for file formats that used the old hash. - PGOHash() : Working(0), Count(0) {} + PGOHash(PGOHashVersion HashVersion) + : Working(0), Count(0), HashVersion(HashVersion), MD5() {} void combine(HashType Type); uint64_t finalize(); + PGOHashVersion getHashVersion() const { return HashVersion; } }; const int PGOHash::NumBitsPerType; const unsigned PGOHash::NumTypesPerWord; const unsigned PGOHash::TooBig; +/// Get the PGO hash version used in the given indexed profile. +static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader, + CodeGenModule &CGM) { + if (PGOReader->getVersion() <= 4) + return PGO_HASH_V1; + return PGO_HASH_V2; +} + /// A RecursiveASTVisitor that fills a map of statements to PGO counters. struct MapRegionCounters : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + /// The next counter value to assign. unsigned NextCounter; /// The function hash. @@ -118,8 +157,9 @@ /// The map of statements to counters. llvm::DenseMap &CounterMap; - MapRegionCounters(llvm::DenseMap &CounterMap) - : NextCounter(0), CounterMap(CounterMap) {} + MapRegionCounters(PGOHashVersion HashVersion, + llvm::DenseMap &CounterMap) + : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {} // Blocks and lambdas are handled as separate functions, so we need not // traverse them in the parent context. @@ -145,16 +185,66 @@ return true; } - bool VisitStmt(const Stmt *S) { - auto Type = getHashType(S); - if (Type == PGOHash::None) - return true; + /// If \p S gets a fresh counter, update the counter mappings. Return the + /// V1 hash of \p S. + PGOHash::HashType updateCounterMappings(Stmt *S) { + auto Type = getHashType(PGO_HASH_V1, S); + if (Type != PGOHash::None) + CounterMap[S] = NextCounter++; + return Type; + } - CounterMap[S] = NextCounter++; - Hash.combine(Type); + /// Include \p S in the function hash. + bool VisitStmt(Stmt *S) { + auto Type = updateCounterMappings(S); + if (Hash.getHashVersion() != PGO_HASH_V1) + Type = getHashType(Hash.getHashVersion(), S); + if (Type != PGOHash::None) + Hash.combine(Type); return true; } - PGOHash::HashType getHashType(const Stmt *S) { + + bool TraverseIfStmt(IfStmt *If) { + // If we used the V1 hash, use the default traversal. + if (Hash.getHashVersion() == PGO_HASH_V1) + return Base::TraverseIfStmt(If); + + // Otherwise, keep track of which branch we're in while traversing. + VisitStmt(If); + for (Stmt *CS : If->children()) { + if (!CS) + continue; + if (CS == If->getThen()) + Hash.combine(PGOHash::IfThenBranch); + else if (CS == If->getElse()) + Hash.combine(PGOHash::IfElseBranch); + TraverseStmt(CS); + } + Hash.combine(PGOHash::EndOfScope); + return true; + } + +// If the statement type \p N is nestable, and its nesting impacts profile +// stability, define a custom traversal which tracks the end of the statement +// in the hash (provided we're not using the V1 hash). +#define DEFINE_NESTABLE_TRAVERSAL(N) \ + bool Traverse##N(N *S) { \ + Base::Traverse##N(S); \ + if (Hash.getHashVersion() != PGO_HASH_V1) \ + Hash.combine(PGOHash::EndOfScope); \ + return true; \ + } + + DEFINE_NESTABLE_TRAVERSAL(WhileStmt) + DEFINE_NESTABLE_TRAVERSAL(DoStmt) + DEFINE_NESTABLE_TRAVERSAL(ForStmt) + DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt) + DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt) + DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt) + DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt) + + /// Get version \p HashVersion of the PGO hash for \p S. + PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) { switch (S->getStmtClass()) { default: break; @@ -192,9 +282,53 @@ return PGOHash::BinaryOperatorLAnd; if (BO->getOpcode() == BO_LOr) return PGOHash::BinaryOperatorLOr; + if (HashVersion == PGO_HASH_V2) { + switch (BO->getOpcode()) { + default: + break; + case BO_LT: + return PGOHash::BinaryOperatorLT; + case BO_GT: + return PGOHash::BinaryOperatorGT; + case BO_LE: + return PGOHash::BinaryOperatorLE; + case BO_GE: + return PGOHash::BinaryOperatorGE; + case BO_EQ: + return PGOHash::BinaryOperatorEQ; + case BO_NE: + return PGOHash::BinaryOperatorNE; + } + } break; } } + + if (HashVersion == PGO_HASH_V2) { + switch (S->getStmtClass()) { + default: + break; + case Stmt::GotoStmtClass: + return PGOHash::GotoStmt; + case Stmt::IndirectGotoStmtClass: + return PGOHash::IndirectGotoStmt; + case Stmt::BreakStmtClass: + return PGOHash::BreakStmt; + case Stmt::ContinueStmtClass: + return PGOHash::ContinueStmt; + case Stmt::ReturnStmtClass: + return PGOHash::ReturnStmt; + case Stmt::CXXThrowExprClass: + return PGOHash::ThrowExpr; + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UO = cast(S); + if (UO->getOpcode() == UO_LNot) + return PGOHash::UnaryOperatorLNot; + break; + } + } + } + return PGOHash::None; } }; @@ -653,8 +787,14 @@ } void CodeGenPGO::mapRegionCounters(const Decl *D) { + // Use the latest hash version when inserting instrumentation, but use the + // version in the indexed profile if we're reading PGO data. + PGOHashVersion HashVersion = PGO_HASH_LATEST; + if (auto *PGOReader = CGM.getPGOReader()) + HashVersion = getPGOHashVersion(PGOReader, CGM); + RegionCounterMap.reset(new llvm::DenseMap); - MapRegionCounters Walker(*RegionCounterMap); + MapRegionCounters Walker(HashVersion, *RegionCounterMap); if (const FunctionDecl *FD = dyn_cast_or_null(D)) Walker.TraverseDecl(const_cast(FD)); else if (const ObjCMethodDecl *MD = dyn_cast_or_null(D)) Index: test/Frontend/Inputs/optimization-remark-with-hotness.proftext =================================================================== --- test/Frontend/Inputs/optimization-remark-with-hotness.proftext +++ test/Frontend/Inputs/optimization-remark-with-hotness.proftext @@ -1,6 +1,6 @@ foo # Func Hash: -0 +24 # Num Counters: 1 # Counter Values: @@ -16,7 +16,7 @@ main # Func Hash: -4 +17496 # Num Counters: 2 # Counter Values: Index: test/Profile/Inputs/c-captured.proftext =================================================================== --- test/Profile/Inputs/c-captured.proftext +++ test/Profile/Inputs/c-captured.proftext @@ -1,23 +1,23 @@ c-captured.c:__captured_stmt -10 +42129 2 1 1 c-captured.c:__captured_stmt.1 -266 +4752450705 3 1 10 1 main -0 +24 1 1 debug_captured -650 +11043906705 3 1 1 Index: test/Profile/Inputs/c-counter-overflows.proftext =================================================================== --- test/Profile/Inputs/c-counter-overflows.proftext +++ test/Profile/Inputs/c-counter-overflows.proftext @@ -1,5 +1,5 @@ main -285734896137 +10111551811706059223 8 1 68719476720 Index: test/Profile/Inputs/c-general.proftext =================================================================== --- test/Profile/Inputs/c-general.proftext +++ test/Profile/Inputs/c-general.proftext @@ -1,5 +1,5 @@ simple_loops -16515 +1245818015463121 4 1 100 @@ -7,7 +7,7 @@ 75 conditionals -74917022372782735 +4190663230902537370 11 1 100 @@ -22,7 +22,7 @@ 100 early_exits -44128811889290 +8265526549255474475 9 1 0 @@ -35,7 +35,7 @@ 0 jumps -2016037664281362839 +15872630527555456493 22 1 1 @@ -61,7 +61,7 @@ 9 switches -2745195701975551402 +11892326508727782373 19 1 1 @@ -84,7 +84,7 @@ 0 big_switch -10218718452081869619 +16933280399284440835 17 1 32 @@ -105,7 +105,7 @@ 2 boolean_operators -291222909838 +1245693242827665 8 1 100 @@ -117,7 +117,7 @@ 50 boolop_loops -9760565944591 +11270260636676715317 9 1 50 @@ -130,14 +130,14 @@ 26 conditional_operator -848 +54992 3 1 0 1 do_fallthrough -16586 +6898770640283947069 4 1 10 @@ -145,12 +145,12 @@ 8 main -0 +24 1 1 c-general.c:static_func -4 +18129 2 1 10 Index: test/Profile/Inputs/c-unprofiled-blocks.proftext =================================================================== --- test/Profile/Inputs/c-unprofiled-blocks.proftext +++ test/Profile/Inputs/c-unprofiled-blocks.proftext @@ -1,5 +1,5 @@ never_called -44257542701577 +5644096560937528444 9 0 0 @@ -12,12 +12,12 @@ 0 main -1 +24 1 1 dead_code -2859007309808137 +9636018207904213947 10 1 0 Index: test/Profile/Inputs/cxx-class.proftext =================================================================== --- test/Profile/Inputs/cxx-class.proftext +++ test/Profile/Inputs/cxx-class.proftext @@ -1,52 +1,52 @@ _Z14simple_wrapperv -4 +18129 2 1 100 main -0 +24 1 1 _ZN6SimpleD1Ev -10 +42129 2 0 0 _ZN6SimpleD2Ev -10 +42129 2 100 99 _ZN6Simple6methodEv -10 +42129 2 100 99 _ZN6SimpleC1Ei -10 +42129 2 0 0 _ZN6SimpleC2Ei -10 +42129 2 100 99 _ZN7DerivedC1Ev -10 +42129 2 100 99 _ZN7DerivedD2Ev -10 +42129 2 100 99 Index: test/Profile/Inputs/cxx-hash-v2.proftext =================================================================== --- /dev/null +++ test/Profile/Inputs/cxx-hash-v2.proftext @@ -0,0 +1,239 @@ +_Z11loop_returnv +# Func Hash: +1160721 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z10loop_breakv +# Func Hash: +1160593 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z8no_gotosv +# Func Hash: +1 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z13loop_continuev +# Func Hash: +1160657 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z18loop_after_if_elsev +# Func Hash: +11044458641 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z21consecutive_try_catchv +# Func Hash: +49221687100497 +# Num Counters: +5 +# Counter Values: +0 +0 +0 +0 +0 + +_Z16nested_try_catchv +# Func Hash: +49147600487505 +# Num Counters: +5 +# Counter Values: +0 +0 +0 +0 +0 + +_Z13indirect_gotov +# Func Hash: +1345 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z17nested_for_rangesv +# Func Hash: +1332305 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z18loop_in_then_blockv +# Func Hash: +11040003281 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z22consecutive_for_rangesv +# Func Hash: +1380689 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z15consecutive_dosv +# Func Hash: +856273 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z11direct_gotov +# Func Hash: +1281 +# Num Counters: +2 +# Counter Values: +0 +0 + +main +# Func Hash: +24 +# Num Counters: +1 +# Counter Values: +1 + +_Z11double_lnotv +# Func Hash: +174695569 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z18if_inside_of_whilev +# Func Hash: +36250705 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z11single_lnotv +# Func Hash: +2729105 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z19if_outside_of_whilev +# Func Hash: +38053009 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z8no_throwv +# Func Hash: +0 +# Num Counters: +1 +# Counter Values: +0 + +_Z9has_throwv +# Func Hash: +25 +# Num Counters: +1 +# Counter Values: +0 + +_Z10nested_dosv +# Func Hash: +799825 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z16if_inside_of_forv +# Func Hash: +4750648401 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z18loop_in_else_blockv +# Func Hash: +11044398161 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z17if_outside_of_forv +# Func Hash: +4752450705 +# Num Counters: +3 +# Counter Values: +0 +0 +0 + +_Z10loop_emptyv +# Func Hash: +18129 +# Num Counters: +2 +# Counter Values: +0 +0 + Index: test/Profile/Inputs/cxx-lambda.proftext =================================================================== --- test/Profile/Inputs/cxx-lambda.proftext +++ test/Profile/Inputs/cxx-lambda.proftext @@ -1,17 +1,17 @@ cxx-lambda.cpp:_ZZ7lambdasvENK3$_0clEi -654 +11211970062 3 10 9 9 main -0 +24 1 1 _Z7lambdasv -41226 +2895087587861649 4 1 1 Index: test/Profile/Inputs/cxx-rangefor.proftext =================================================================== --- test/Profile/Inputs/cxx-rangefor.proftext +++ test/Profile/Inputs/cxx-rangefor.proftext @@ -1,5 +1,5 @@ _Z9range_forv -0x000000000014a28a +6169071350249721981 5 1 4 @@ -8,6 +8,6 @@ 1 main -0 +24 1 1 Index: test/Profile/Inputs/cxx-templates.proftext =================================================================== --- test/Profile/Inputs/cxx-templates.proftext +++ test/Profile/Inputs/cxx-templates.proftext @@ -1,16 +1,16 @@ main -0 +24 1 1 _Z4loopILj0EEvv -4 +18129 2 1 0 _Z4loopILj100EEvv -4 +18129 2 1 100 Index: test/Profile/Inputs/cxx-throws.proftext =================================================================== --- test/Profile/Inputs/cxx-throws.proftext +++ test/Profile/Inputs/cxx-throws.proftext @@ -1,5 +1,5 @@ _Z6throwsv -18359008150154 +340120998528097520 9 1 100 @@ -12,14 +12,14 @@ 100 _Z11unreachablei -0x28a +706946049169 3 1 1 0 main -0x2cc +187765848 3 1 1 Index: test/Profile/Inputs/func-entry.proftext =================================================================== --- test/Profile/Inputs/func-entry.proftext +++ test/Profile/Inputs/func-entry.proftext @@ -1,10 +1,10 @@ foo -0 +24 1 1000 main -4 +1160280 2 1 10000 Index: test/Profile/Inputs/gcc-flag-compatibility.proftext =================================================================== --- test/Profile/Inputs/gcc-flag-compatibility.proftext +++ test/Profile/Inputs/gcc-flag-compatibility.proftext @@ -1,5 +1,5 @@ main -4 +1160280 2 1 100 Index: test/Profile/Inputs/objc-general.proftext =================================================================== --- test/Profile/Inputs/objc-general.proftext +++ test/Profile/Inputs/objc-general.proftext @@ -1,17 +1,30 @@ objc-general.m:__13+[A foreach:]_block_invoke -10 +42129 2 2 1 objc-general.m:+[A foreach:] -6 +401 2 1 2 main -0 +24 1 1 +consecutive_objc_for_ranges +1642897 +3 +0 +0 +0 + +nested_objc_for_ranges +1598545 +3 +0 +0 +0 Index: test/Profile/c-outdated-data.c =================================================================== --- test/Profile/c-outdated-data.c +++ test/Profile/c-outdated-data.c @@ -7,10 +7,10 @@ // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-outdated-data.c %s -o /dev/null -emit-llvm -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck %s -check-prefix=NO_MISSING // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-outdated-data.c %s -o /dev/null -emit-llvm -Wprofile-instr-missing -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck %s -check-prefix=WITH_MISSING -// NO_MISSING: warning: profile data may be out of date: of 3 functions, 1 has mismatched data that will be ignored +// NO_MISSING: warning: profile data may be out of date: of 3 functions, 2 have mismatched data that will be ignored // NO_MISSING-NOT: 1 has no data -// WITH_MISSING: warning: profile data may be out of date: of 3 functions, 1 has mismatched data that will be ignored +// WITH_MISSING: warning: profile data may be out of date: of 3 functions, 2 have mismatched data that will be ignored // WITH_MISSING: warning: profile data may be incomplete: of 3 functions, 1 has no data void no_usable_data() { Index: test/Profile/cxx-hash-v2.cpp =================================================================== --- /dev/null +++ test/Profile/cxx-hash-v2.cpp @@ -0,0 +1,177 @@ +// REQUIRES: shell + +// Check that all of the hashes in this file are unique (i.e, that none of the +// profiles for these functions are mutually interchangeable). +// +// RUN: llvm-profdata show -all-functions %S/Inputs/cxx-hash-v2.profdata.v5 | grep "Hash: 0x" | sort > %t.hashes +// RUN: uniq %t.hashes > %t.hashes.unique +// RUN: diff %t.hashes %t.hashes.unique + +// RUN: llvm-profdata merge %S/Inputs/cxx-hash-v2.proftext -o %t.profdata +// RUN: %clang_cc1 -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-apple-macosx10.9 -main-file-name cxx-hash-v2.mm %s -o /dev/null -emit-llvm -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck %s -allow-empty +// RUN: %clang_cc1 -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-apple-macosx10.9 -main-file-name cxx-hash-v2.mm %s -o /dev/null -emit-llvm -fprofile-instrument-use-path=%S/Inputs/cxx-hash-v2.profdata.v5 2>&1 | FileCheck %s -allow-empty + +// CHECK-NOT: warning: profile data may be out of date + +int x; +int arr[1] = {0}; + +void loop_after_if_else() { + if (1) + x = 1; + else + x = 2; + while (0) + ++x; +} + +void loop_in_then_block() { + if (1) { + while (0) + ++x; + } else { + x = 2; + } +} + +void loop_in_else_block() { + if (1) { + x = 1; + } else { + while (0) + ++x; + } +} + +void if_inside_of_for() { + for (x = 0; x < 0; ++x) { + x = 1; + if (1) + x = 2; + } +} + +void if_outside_of_for() { + for (x = 0; x < 0; ++x) + x = 1; + if (1) + x = 2; +} + +void if_inside_of_while() { + while (0) { + x = 1; + if (1) + x = 2; + } +} + +void if_outside_of_while() { + while (0) + x = 1; + if (1) + x = 2; +} + +void nested_dos() { + do { + do { + ++x; + } while (0); + } while (0); +} + +void consecutive_dos() { + do { + } while (0); + do { + ++x; + } while (0); +} + +void loop_empty() { + for (x = 0; x < 5; ++x) {} +} + +void loop_return() { + for (x = 0; x < 5; ++x) + return; +} + +void loop_continue() { + for (x = 0; x < 5; ++x) + continue; +} + +void loop_break() { + for (x = 0; x < 5; ++x) + break; +} + +void no_gotos() { + static void *dispatch[] = {&&done}; + x = 0; +done: + ++x; +} + +void direct_goto() { + static void *dispatch[] = {&&done}; + x = 0; + goto done; +done: + ++x; +} + +void indirect_goto() { + static void *dispatch[] = {&&done}; + x = 0; + goto *dispatch[x]; +done: + ++x; +} + +void nested_for_ranges() { + for (int a : arr) + for (int b : arr) + ++x; +} + +void consecutive_for_ranges() { + for (int a : arr) {} + for (int b : arr) + ++x; +} + +void nested_try_catch() { + try { + try { + ++x; + } catch (...) {} + } catch (...) {} +} + +void consecutive_try_catch() { + try {} catch (...) {} + try { + ++x; + } catch (...) {} +} + +void no_throw() {} + +void has_throw() { + throw 0; +} + +void single_lnot() { + if (!x) {} +} + +void double_lnot() { + if (!!x) {} +} + +int main() { + return 0; +} Index: test/Profile/objc-general.m =================================================================== --- test/Profile/objc-general.m +++ test/Profile/objc-general.m @@ -3,7 +3,9 @@ // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name objc-general.m %s -o - -emit-llvm -fblocks -fprofile-instrument=clang | FileCheck -check-prefix=PGOGEN %s // RUN: llvm-profdata merge %S/Inputs/objc-general.proftext -o %t.profdata -// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name objc-general.m %s -o - -emit-llvm -fblocks -fprofile-instrument-use-path=%t.profdata | FileCheck -check-prefix=PGOUSE %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name objc-general.m %s -o - -emit-llvm -fblocks -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck -check-prefix=PGOUSE %s + +// PGOUSE-NOT: warning: profile data may be out of date #ifdef HAVE_FOUNDATION @@ -63,6 +65,20 @@ } @end +void nested_objc_for_ranges(NSArray *arr) { + int x = 0; + for (id a in arr) + for (id b in arr) + ++x; +} + +void consecutive_objc_for_ranges(NSArray *arr) { + int x = 0; + for (id a in arr) {} + for (id b in arr) + ++x; +} + // PGOUSE-DAG: ![[FR1]] = !{!"branch_weights", i32 2, i32 3} // PGOUSE-DAG: ![[FR2]] = !{!"branch_weights", i32 3, i32 2} // PGOUSE-DAG: ![[BL1]] = !{!"branch_weights", i32 2, i32 2}