Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -275,6 +275,16 @@ def warn_profile_data_unprofiled : Warning< "no profile data available for file \"%0\"">, InGroup; +def warn_profile_data_misexpect : Warning< + "Potential performance regression from use of __builtin_expect(): " + "Annotation was correct on %0 of profiled executions.">, + InGroup, + DefaultIgnore; +def remark_profile_data_misexpect : Remark< + "Potential performance regression from use of __builtin_expect(): " + "Annotation was correct on %0 of profiled executions.">, + BackendInfo, + InGroup; } // end of instrumentation issue category Index: clang/include/clang/Basic/DiagnosticGroups.td =================================================================== --- clang/include/clang/Basic/DiagnosticGroups.td +++ clang/include/clang/Basic/DiagnosticGroups.td @@ -1031,6 +1031,7 @@ def ProfileInstrMissing : DiagGroup<"profile-instr-missing">; def ProfileInstrOutOfDate : DiagGroup<"profile-instr-out-of-date">; def ProfileInstrUnprofiled : DiagGroup<"profile-instr-unprofiled">; +def MisExpect : DiagGroup<"misexpect">; // AddressSanitizer frontend instrumentation remarks. def SanitizeAddressRemarks : DiagGroup<"sanitize-address">; Index: clang/lib/CodeGen/CodeGenAction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenAction.cpp +++ clang/lib/CodeGen/CodeGenAction.cpp @@ -14,6 +14,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclGroup.h" +#include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/SourceManager.h" @@ -363,6 +364,7 @@ bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); /// Specialized handler for unsupported backend feature diagnostic. void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); + void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D); /// Specialized handlers for optimization remarks. /// Note that these handlers only accept remarks and they always handle /// them. @@ -615,6 +617,26 @@ << Filename << Line << Column; } +void BackendConsumer::MisExpectDiagHandler( + const llvm::DiagnosticInfoMisExpect &D) { + StringRef Filename; + unsigned Line, Column; + bool BadDebugInfo = false; + FullSourceLoc Loc = + getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); + + Diags.Report(Loc, diag::warn_profile_data_misexpect) << D.getMsg().str(); + Diags.Report(Loc, diag::remark_profile_data_misexpect) << D.getMsg().str(); + + if (BadDebugInfo) + // If we were not able to translate the file:line:col information + // back to a SourceLocation, at least emit a note stating that + // we could not translate this location. This can happen in the + // case of #line directives. + Diags.Report(Loc, diag::note_fe_backend_invalid_loc) + << Filename << Line << Column; +} + void BackendConsumer::EmitOptimizationMessage( const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) { // We only support warnings and remarks. @@ -785,6 +807,9 @@ case llvm::DK_Unsupported: UnsupportedDiagHandler(cast(DI)); return; + case llvm::DK_MisExpect: + MisExpectDiagHandler(cast(DI)); + return; default: // Plugin IDs are not bound to any value as they are set dynamically. ComputeDiagRemarkID(Severity, backend_plugin, DiagID); Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -33,6 +33,7 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" @@ -1360,8 +1361,6 @@ return true; } - - /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an if /// statement) to the specified blocks. Based on the condition, this might try /// to simplify the codegen of the conditional based on the branch. Index: clang/test/Profile/Inputs/misexpect-branch-nonconst-expect-arg.proftext =================================================================== --- /dev/null +++ clang/test/Profile/Inputs/misexpect-branch-nonconst-expect-arg.proftext @@ -0,0 +1,9 @@ +bar +# Func Hash: +11262309464 +# Num Counters: +2 +# Counter Values: +200000 +2 + Index: clang/test/Profile/Inputs/misexpect-branch.proftext =================================================================== --- /dev/null +++ clang/test/Profile/Inputs/misexpect-branch.proftext @@ -0,0 +1,9 @@ +bar +# Func Hash: +45795613684824 +# Num Counters: +2 +# Counter Values: +200000 +0 + Index: clang/test/Profile/Inputs/misexpect-switch-default-only.proftext =================================================================== --- /dev/null +++ clang/test/Profile/Inputs/misexpect-switch-default-only.proftext @@ -0,0 +1,12 @@ +main +# Func Hash: +79676873694057560 +# Num Counters: +5 +# Counter Values: +1 +20 +20000 +20000 +20000 + Index: clang/test/Profile/Inputs/misexpect-switch-default.proftext =================================================================== --- /dev/null +++ clang/test/Profile/Inputs/misexpect-switch-default.proftext @@ -0,0 +1,17 @@ +main +# Func Hash: +8712453512413296413 +# Num Counters: +9 +# Counter Values: +1 +20000 +20000 +4066 +11889 +0 +0 +4045 +0 + + Index: clang/test/Profile/Inputs/misexpect-switch.proftext =================================================================== --- /dev/null +++ clang/test/Profile/Inputs/misexpect-switch.proftext @@ -0,0 +1,16 @@ +main +# Func Hash: +1965403898329309329 +# Num Counters: +9 +# Counter Values: +1 +20 +20000 +20000 +12 +26 +0 +0 +19962 + Index: clang/test/Profile/misexpect-branch-cold.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-branch-cold.c @@ -0,0 +1,27 @@ +// Test that misexpect emits no warning when prediction is correct + +// RUN: llvm-profdata merge %S/Inputs/misexpect-branch.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect + +// expected-no-diagnostics +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +int foo(int); +int baz(int); +int buzz(); + +const int inner_loop = 100; +const int outer_loop = 2000; + +int bar() { + int rando = buzz(); + int x = 0; + if (unlikely(rando % (outer_loop * inner_loop) == 0)) { + x = baz(rando); + } else { + x = foo(50); + } + return x; +} + Index: clang/test/Profile/misexpect-branch-nonconst-expected-val.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-branch-nonconst-expected-val.c @@ -0,0 +1,26 @@ +// Test that misexpect emits no warning when condition is not a compile-time constant + +// RUN: llvm-profdata merge %S/Inputs/misexpect-branch-nonconst-expect-arg.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect + +// expected-no-diagnostics +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +int foo(int); +int baz(int); +int buzz(); + +const int inner_loop = 100; +const int outer_loop = 2000; + +int bar() { + int rando = buzz(); + int x = 0; + if (__builtin_expect(rando % (outer_loop * inner_loop) == 0, buzz())) { + x = baz(rando); + } else { + x = foo(50); + } + return x; +} Index: clang/test/Profile/misexpect-branch.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-branch.c @@ -0,0 +1,26 @@ +// Test that misexpect detects mis-annotated branches + +// RUN: llvm-profdata merge %S/Inputs/misexpect-branch.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect -debug-info-kind=line-tables-only + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +int foo(int); +int baz(int); +int buzz(); + +const int inner_loop = 100; +const int outer_loop = 2000; + +int bar() { + int rando = buzz(); + int x = 0; + if (likely(rando % (outer_loop * inner_loop) == 0)) { // expected-warning-re {{Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% of profiled executions.}} + x = baz(rando); + } else { + x = foo(50); + } + return x; +} + Index: clang/test/Profile/misexpect-no-warning-without-flag.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-no-warning-without-flag.c @@ -0,0 +1,27 @@ +// Test that misexpect emits no warning without -fmisexpect flag + +// RUN: llvm-profdata merge %S/Inputs/misexpect-branch.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify + +// expected-no-diagnostics +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +int foo(int); +int baz(int); +int buzz(); + +const int inner_loop = 100; +const int outer_loop = 2000; + +int bar() { + int rando = buzz(); + int x = 0; + if (likely(rando % (outer_loop * inner_loop) == 0)) { + x = baz(rando); + } else { + x = foo(50); + } + return x; +} + Index: clang/test/Profile/misexpect-switch-default.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-switch-default.c @@ -0,0 +1,40 @@ +// Test that misexpect detects mis-annotated switch statements for default case + +// RUN: llvm-profdata merge %S/Inputs/misexpect-switch-default.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect -debug-info-kind=line-tables-only + +int sum(int *buff, int size); +int random_sample(int *buff, int size); +int rand(); +void init_arry(); + +const int inner_loop = 1000; +const int outer_loop = 20; +const int arry_size = 25; + +int arry[arry_size] = {0}; + +int main() { + init_arry(); + int val = 0; + int j; + for (j = 0; j < outer_loop * inner_loop; ++j) { + unsigned condition = rand() % 5; + switch (__builtin_expect(condition, 6)) { // expected-warning-re {{Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% of profiled executions.}} + case 0: + val += sum(arry, arry_size); + break; + case 1: + case 2: + case 3: + break; + case 4: + val += random_sample(arry, arry_size); + break; + default: + __builtin_unreachable(); + } // end switch + } // end outer_loop + + return 0; +} Index: clang/test/Profile/misexpect-switch-nonconst.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-switch-nonconst.c @@ -0,0 +1,43 @@ +// Test that misexpect emits no warning when switch condition is non-const + +// RUN: llvm-profdata merge %S/Inputs/misexpect-switch.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect + +// expected-no-diagnostics +int sum(int *buff, int size); +int random_sample(int *buff, int size); +int rand(); +void init_arry(); + +const int inner_loop = 1000; +const int outer_loop = 20; +const int arry_size = 25; + +int arry[arry_size] = {0}; + +int main() { + init_arry(); + int val = 0; + + int j, k; + for (j = 0; j < outer_loop; ++j) { + for (k = 0; k < inner_loop; ++k) { + unsigned condition = rand() % 10000; + switch (__builtin_expect(condition, rand())) { + case 0: + val += sum(arry, arry_size); + break; + case 1: + case 2: + case 3: + case 4: + val += random_sample(arry, arry_size); + break; + default: + __builtin_unreachable(); + } // end switch + } // end inner_loop + } // end outer_loop + + return 0; +} Index: clang/test/Profile/misexpect-switch-only-default-case.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-switch-only-default-case.c @@ -0,0 +1,35 @@ +// Test that misexpect emits no warning when there is only one switch case + +// RUN: llvm-profdata merge %S/Inputs/misexpect-switch-default-only.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect -debug-info-kind=line-tables-only + +// expected-no-diagnostics +int sum(int *buff, int size); +int random_sample(int *buff, int size); +int rand(); +void init_arry(); + +const int inner_loop = 1000; +const int outer_loop = 20; +const int arry_size = 25; + +int arry[arry_size] = {0}; + +int main() { + init_arry(); + int val = 0; + + int j, k; + for (j = 0; j < outer_loop; ++j) { + for (k = 0; k < inner_loop; ++k) { + unsigned condition = rand() % 10000; + switch (__builtin_expect(condition, 0)) { + default: + val += random_sample(arry, arry_size); + break; + }; // end switch + } // end inner_loop + } // end outer_loop + + return 0; +} Index: clang/test/Profile/misexpect-switch.c =================================================================== --- /dev/null +++ clang/test/Profile/misexpect-switch.c @@ -0,0 +1,41 @@ +// Test that misexpect detects mis-annotated switch statements + +// RUN: llvm-profdata merge %S/Inputs/misexpect-switch.proftext -o %t.profdata +// RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect -debug-info-kind=line-tables-only + +int sum(int *buff, int size); +int random_sample(int *buff, int size); +int rand(); +void init_arry(); + +const int inner_loop = 1000; +const int outer_loop = 20; +const int arry_size = 25; + +int arry[arry_size] = {0}; + +int main() { + init_arry(); + int val = 0; + + int j, k; + for (j = 0; j < outer_loop; ++j) { + for (k = 0; k < inner_loop; ++k) { + unsigned condition = rand() % 10000; + switch (__builtin_expect(condition, 0)) { // expected-warning-re {{Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% of profiled executions.}} + case 0: + val += sum(arry, arry_size); + break; + case 1: + case 2: + case 3: + break; + default: + val += random_sample(arry, arry_size); + break; + } // end switch + } // end inner_loop + } // end outer_loop + + return 0; +} Index: llvm/include/llvm/IR/DiagnosticInfo.h =================================================================== --- llvm/include/llvm/IR/DiagnosticInfo.h +++ llvm/include/llvm/IR/DiagnosticInfo.h @@ -75,7 +75,8 @@ DK_MIRParser, DK_PGOProfile, DK_Unsupported, - DK_FirstPluginKind + DK_FirstPluginKind, + DK_MisExpect }; /// Get the next available kind ID for a plugin diagnostic. @@ -1002,6 +1003,31 @@ void print(DiagnosticPrinter &DP) const override; }; +/// Diagnostic information for MisExpect analysis. +class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase { +public: + DiagnosticInfoMisExpect(const Function &Fn, const Twine &Msg, + const DiagnosticLocation &Loc = DiagnosticLocation(), + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfoWithLocationBase(DK_MisExpect, Severity, Fn, Loc), + Msg(Msg) {} + + DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg); + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_MisExpect; + } + + const Twine &getMsg() const { return Msg; } + +private: + /// Message to report. + const Twine &Msg; +}; + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H Index: llvm/include/llvm/IR/FixedMetadataKinds.def =================================================================== --- llvm/include/llvm/IR/FixedMetadataKinds.def +++ llvm/include/llvm/IR/FixedMetadataKinds.def @@ -39,3 +39,4 @@ LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25) LLVM_FIXED_MD_KIND(MD_callback, "callback", 26) LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) +LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28) Index: llvm/include/llvm/IR/MDBuilder.h =================================================================== --- llvm/include/llvm/IR/MDBuilder.h +++ llvm/include/llvm/IR/MDBuilder.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/DataTypes.h" #include @@ -75,6 +76,10 @@ /// Return metadata containing the section prefix for a function. MDNode *createFunctionSectionPrefix(StringRef Prefix); + /// return metadata containing expected value + MDNode *createMisExpect(uint64_t Index, uint64_t LikelyWeight, + uint64_t UnlikelyWeight); + //===------------------------------------------------------------------===// // Range metadata. //===------------------------------------------------------------------===// Index: llvm/include/llvm/Transforms/Utils/MisExpect.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/MisExpect.h @@ -0,0 +1,43 @@ +//===--- MisExpect.cpp - Check Use of __builtin_expect() with PGO data ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of +// __builtin_expect(). This utility extracts the threshold values from metadata +// associated with the instrumented Branch or Switch. The threshold values are +// then used to determin if a warning would be emmited. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { +namespace misexpect { + +/// verifyMisExpect - compares PGO counters to the thresholds used for +/// __builtin_expect and warns if the PGO counters are outside of the expected +/// range. +/// \param I the Instruction being checked +/// \param Weights A vector of profile weights for each target block +/// \param Ctx The current LLVM context +void verifyMisExpect(llvm::Instruction *I, + const llvm::SmallVector &Weights, + llvm::LLVMContext &Ctx); + +/// checkClangInstrumentation - verify if __builtin_expect matches PGO profile +/// This function checks the frontend instrumentation in the backend when +/// lowering __builtin_expect intrinsics. It checks for existing metadata, and +/// then validates the use of builtin-expect against the assigned branch +/// weights. +/// \param I the Instruction being checked +void checkClangInstrumentation(Instruction &I); + +} // namespace misexpect +} // namespace llvm Index: llvm/lib/IR/DiagnosticInfo.cpp =================================================================== --- llvm/lib/IR/DiagnosticInfo.cpp +++ llvm/lib/IR/DiagnosticInfo.cpp @@ -370,5 +370,16 @@ return OS.str(); } +DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst, + Twine &Msg) + : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning, + *Inst->getParent()->getParent(), + Inst->getDebugLoc()), + Msg(Msg) {} + +void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const { + DP << getLocationStr() << ": " << getMsg(); +} + void OptimizationRemarkAnalysisFPCommute::anchor() {} void OptimizationRemarkAnalysisAliasing::anchor() {} Index: llvm/lib/IR/MDBuilder.cpp =================================================================== --- llvm/lib/IR/MDBuilder.cpp +++ llvm/lib/IR/MDBuilder.cpp @@ -309,3 +309,15 @@ }; return MDNode::get(Context, Vals); } + +MDNode *MDBuilder::createMisExpect(uint64_t Index, uint64_t LikleyWeight, + uint64_t UnlikleyWeight) { + auto IntType = Type::getInt64Ty(Context); + Metadata *Vals[] = { + createString("misexpect"), + createConstant(ConstantInt::get(IntType, Index)), + createConstant(ConstantInt::get(IntType, LikleyWeight)), + createConstant(ConstantInt::get(IntType, UnlikleyWeight)), + }; + return MDNode::get(Context, Vals); +} Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -72,6 +72,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include #include #include @@ -1447,6 +1448,8 @@ } } + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -108,6 +108,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include #include #include @@ -1776,6 +1777,9 @@ : Weights) { dbgs() << W << " "; } dbgs() << "\n";); + + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); if (EmitBranchProbability) { std::string BrCondStr = getBranchCondString(TI); Index: llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/MisExpect.h" using namespace llvm; @@ -71,15 +72,20 @@ unsigned n = SI.getNumCases(); // +1 for default case. SmallVector Weights(n + 1, UnlikelyBranchWeight); - if (Case == *SI.case_default()) - Weights[0] = LikelyBranchWeight; - else - Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight; + uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1; + Weights[Index] = LikelyBranchWeight; + + SI.setMetadata( + LLVMContext::MD_misexpect, + MDBuilder(CI->getContext()) + .createMisExpect(Index, LikelyBranchWeight, UnlikelyBranchWeight)); + + SI.setCondition(ArgValue); + misexpect::checkClangInstrumentation(SI); SI.setMetadata(LLVMContext::MD_prof, MDBuilder(CI->getContext()).createBranchWeights(Weights)); - SI.setCondition(ArgValue); return true; } @@ -280,19 +286,28 @@ MDBuilder MDB(CI->getContext()); MDNode *Node; + MDNode *ExpNode; if ((ExpectedValue->getZExtValue() == ValueComparedTo) == - (Predicate == CmpInst::ICMP_EQ)) + (Predicate == CmpInst::ICMP_EQ)) { Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); - else + ExpNode = MDB.createMisExpect(0, LikelyBranchWeight, UnlikelyBranchWeight); + } else { Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); + ExpNode = MDB.createMisExpect(1, LikelyBranchWeight, UnlikelyBranchWeight); + } - BSI.setMetadata(LLVMContext::MD_prof, Node); + BSI.setMetadata(LLVMContext::MD_misexpect, ExpNode); if (CmpI) CmpI->setOperand(0, ArgValue); else BSI.setCondition(ArgValue); + + misexpect::checkClangInstrumentation(BSI); + + BSI.setMetadata(LLVMContext::MD_prof, Node); + return true; } Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -40,6 +40,7 @@ LowerSwitch.cpp Mem2Reg.cpp MetaRenamer.cpp + MisExpect.cpp ModuleUtils.cpp NameAnonGlobals.cpp PredicateInfo.cpp Index: llvm/lib/Transforms/Utils/MisExpect.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/MisExpect.cpp @@ -0,0 +1,125 @@ +//===--- MisExpect.cpp - Check Use of __builtin_expect() with PGO data ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of +// __builtin_expect(). This utility extracts the threshold values from metadata +// associated with the instrumented Branch or Switch. The threshold values are +// then used to determin if a warning would be emmited. +// +// MisExpect metadata is generated when llvm.expect intrinsics are lowered see +// LowerExpectIntrinsic.cpp +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MisExpect.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include +#include + +#define DEBUG_TYPE "misexpect" + +using namespace llvm; +using namespace misexpect; + +namespace { + +Instruction *getOprndOrInst(Instruction *I) { + Instruction *Ret = nullptr; + if (auto B = dyn_cast(I)) { + Ret = dyn_cast(B->getCondition()); + } else if (auto S = dyn_cast(I)) { + Ret = dyn_cast(S->getCondition()); + } + return Ret ? Ret : I; +} + +void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, + double PercentageCorrect) { + auto PerString = formatv("{0:P}", PercentageCorrect); + Twine Msg(PerString); + Instruction *Cond = getOprndOrInst(I); + Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); +} + +} // namespace + +namespace llvm { +namespace misexpect { + +void verifyMisExpect(Instruction *I, const SmallVector &Weights, + LLVMContext &Ctx) { + if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) { + auto *MisExpectDataName = dyn_cast(MisExpectData->getOperand(0)); + if (MisExpectDataName && + MisExpectDataName->getString().equals("misexpect")) { + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + LLVM_DEBUG(llvm::dbgs() + << "Function: " << I->getFunction()->getName() << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n"); + LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) { + llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n"; + }); + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + + // extract values from misexpect metadata + auto *C = mdconst::dyn_extract(MisExpectData->getOperand(1)); + auto *L = mdconst::dyn_extract(MisExpectData->getOperand(2)); + auto *U = mdconst::dyn_extract(MisExpectData->getOperand(3)); + + uint64_t Index = C->getValue().getZExtValue(); + uint64_t LikelyBranchWeight = L->getValue().getZExtValue(); + uint64_t UnlikelyBranchWeight = U->getValue().getZExtValue(); + uint64_t ProfileCount = Weights[Index]; + uint64_t CaseTotal = + std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, + [](uint64_t W1, uint64_t W2) { return W1 + W2; }); + int NumUnlikelyTargets = Weights.size() - 2; + + const uint64_t TotalBranchWeight = + LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); + + double Percentage = ((double)ProfileCount / (double)CaseTotal); + + const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight, + TotalBranchWeight); + auto ScaledThreshold = LikelyThreshold.scale(CaseTotal); + + if (ProfileCount < ScaledThreshold) + emitMisexpectDiagnostic(I, Ctx, Percentage); + } + } +} + +void checkClangInstrumentation(Instruction &I) { + if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) { + unsigned NOps = MD->getNumOperands(); + // Operand 0 is a string tag "VP": + if (MDString *Tag = cast(MD->getOperand(0))) { + if (NOps > 1 && Tag->getString().equals("branch_weights")) { + SmallVector RealWeights(NOps - 1); + for (unsigned i = 1; i < NOps; i++) { + ConstantInt *Value = + mdconst::dyn_extract(MD->getOperand(i)); + RealWeights[i - 1] = Value->getZExtValue(); + } + misexpect::verifyMisExpect(&I, RealWeights, I.getContext()); + } + } + } +} + +} // namespace misexpect +} // namespace llvm +#undef DEBUG_TYPE Index: llvm/test/ThinLTO/X86/lazyload_metadata.ll =================================================================== --- llvm/test/ThinLTO/X86/lazyload_metadata.ll +++ llvm/test/ThinLTO/X86/lazyload_metadata.ll @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 61 bitcode-reader - Number of Metadata records loaded +; LAZY: 63 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 70 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 72 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded Index: llvm/test/Transforms/LowerExpectIntrinsic/basic.ll =================================================================== --- llvm/test/Transforms/LowerExpectIntrinsic/basic.ll +++ llvm/test/Transforms/LowerExpectIntrinsic/basic.ll @@ -138,7 +138,7 @@ %conv1 = sext i32 %conv to i64 %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 0) %tobool = icmp ne i64 %expval, 0 -; CHECK: !prof !1 +; CHECK: !prof !2 ; CHECK-NOT: @llvm.expect br i1 %tobool, label %if.then, label %if.end @@ -165,7 +165,7 @@ %tmp = load i32, i32* %x.addr, align 4 %conv = sext i32 %tmp to i64 %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1) -; CHECK: !prof !2 +; CHECK: !prof !4 ; CHECK-NOT: @llvm.expect switch i64 %expval, label %sw.epilog [ i64 1, label %sw.bb @@ -194,7 +194,7 @@ %tmp = load i32, i32* %x.addr, align 4 %conv = sext i32 %tmp to i64 %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1) -; CHECK: !prof !3 +; CHECK: !prof !5 ; CHECK-NOT: @llvm.expect switch i64 %expval, label %sw.epilog [ i64 2, label %sw.bb @@ -278,7 +278,7 @@ %t7 = call i64 @llvm.expect.i64(i64 %t6, i64 0) %t8 = icmp ne i64 %t7, 0 %t9 = select i1 %t8, i32 1, i32 2 -; CHECK: select{{.*}}, !prof !1 +; CHECK: select{{.*}}, !prof !2 ret i32 %t9 } @@ -286,6 +286,6 @@ declare i1 @llvm.expect.i1(i1, i1) nounwind readnone ; CHECK: !0 = !{!"branch_weights", i32 2000, i32 1} -; CHECK: !1 = !{!"branch_weights", i32 1, i32 2000} -; CHECK: !2 = !{!"branch_weights", i32 1, i32 2000, i32 1} -; CHECK: !3 = !{!"branch_weights", i32 2000, i32 1, i32 1} +; CHECK: !2 = !{!"branch_weights", i32 1, i32 2000} +; CHECK: !4 = !{!"branch_weights", i32 1, i32 2000, i32 1} +; CHECK: !5 = !{!"branch_weights", i32 2000, i32 1, i32 1}