diff --git a/llvm/lib/Transforms/Instrumentation/CFGMST.h b/llvm/lib/Transforms/Instrumentation/CFGMST.h --- a/llvm/lib/Transforms/Instrumentation/CFGMST.h +++ b/llvm/lib/Transforms/Instrumentation/CFGMST.h @@ -20,6 +20,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -28,6 +29,11 @@ #define DEBUG_TYPE "cfgmst" +using namespace llvm; +static cl::opt PGOInstrumentEntry( + "pgo-instrument-entry", cl::init(false), cl::Hidden, + cl::desc("Force to instrument function entry basicblock.")); + namespace llvm { /// An union-find based Minimum Spanning Tree for CFG @@ -100,8 +106,11 @@ const BasicBlock *Entry = &(F.getEntryBlock()); uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); + // If we want to instrument the entry count, lower the weight to 0. + if (PGOInstrumentEntry) + EntryWeight = 0; Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, - *ExitOutgoing = nullptr, *ExitIncoming = nullptr; + *ExitOutgoing = nullptr, *ExitIncoming = nullptr; uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; // Add a fake edge to the entry. @@ -135,6 +144,8 @@ } if (BPI != nullptr) Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); + if (Weight == 0) + Weight++; auto *E = &addEdge(&*BB, TargetBB, Weight); E->IsCritical = Critical; LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " @@ -278,6 +289,9 @@ buildEdges(); sortEdgesByWeight(); computeMinimumSpanningTree(); + if (PGOInstrumentEntry && (AllEdges.size() > 1)) + std::iter_swap(std::move(AllEdges.begin()), + std::move(AllEdges.begin() + AllEdges.size() - 1)); } }; diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -110,6 +110,12 @@ " for promoted counters only"), cl::init(false)); +cl::opt AtomicFirstCounter( + "atomic-first-counter", cl::ZeroOrMore, + cl::desc("Use atomic fetch add for first counter in a function (usually " + "the entry counter)"), + cl::init(false)); + // If the option is not specified, the default behavior about whether // counter promotion is done depends on how instrumentaiton lowering // pipeline is setup, i.e., the default value of true of this option @@ -696,7 +702,8 @@ Addr = Builder.CreateIntToPtr(Add, Int64PtrTy); } - if (Options.Atomic || AtomicCounterUpdateAll) { + if (Options.Atomic || AtomicCounterUpdateAll || + (Index == 0 && AtomicFirstCounter)) { Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), AtomicOrdering::Monotonic); } else { diff --git a/llvm/test/Transforms/PGOProfile/instr_entry_bb.ll b/llvm/test/Transforms/PGOProfile/instr_entry_bb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/instr_entry_bb.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -pgo-instr-gen -pgo-instrument-entry -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instrument-entry -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -pgo-instr-gen -pgo-instrument-entry -instrprof -atomic-first-counter -S | FileCheck %s --check-prefix=GENA +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -pgo-instrument-entry -atomic-first-counter -S | FileCheck %s --check-prefix=GENA + +; RUN: llvm-profdata merge %S/Inputs/branch2.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instrument-entry -S | FileCheck %s --check-prefix=USE +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instrument-entry -S | FileCheck %s --check-prefix=USE +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; GEN: $__llvm_profile_raw_version = comdat any +; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat +; GEN: @__profn_test_br_2 = private constant [9 x i8] c"test_br_2" + +define i32 @test_br_2(i32 %i) { +entry: +; GEN: entry: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 0) +; GENA: entry: +; GENA: %{{[0-9+]}} = atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__profc_test_br_2, i64 0, i64 0), i64 1 monotonic +; USE: br i1 %cmp, label %if.then, label %if.else +; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]] +; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 0, i32 1} + %cmp = icmp sgt i32 %i, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: +; GEN: if.then: +; GEN-NOT: llvm.instrprof.increment + %add = add nsw i32 %i, 2 + br label %if.end + +if.else: +; GEN: if.else: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 1) +; GENA: if.else: +; GENA: %pgocount = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__profc_test_br_2, i64 0, i64 1), align 8 +; GENA: [[V:%[0-9]*]] = add i64 %pgocount, 1 +; GENA: store i64 [[V]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__profc_test_br_2, i64 0, i64 1), align 8 + %sub = sub nsw i32 %i, 2 + br label %if.end + +if.end: +; GEN: if.end: +; GEN-NOT: llvm.instrprof.increment + %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ] + ret i32 %retv +; GEN: ret +}