Index: llvm/trunk/lib/Transforms/Instrumentation/CFGMST.h =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/CFGMST.h +++ llvm/trunk/lib/Transforms/Instrumentation/CFGMST.h @@ -46,6 +46,10 @@ // This map records the auxiliary information for each BB. DenseMap> BBInfos; + // Whehter the function has an exit block with no successors. + // (For function with an infinite loop, this block may be absent) + bool ExitBlockFound = false; + // Find the root group of the G and compress the path from G to the root. BBInfo *findAndCompressGroup(BBInfo *G) { if (G->Group != G) @@ -95,14 +99,20 @@ void buildEdges() { DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); - const BasicBlock *BB = &(F.getEntryBlock()); + const BasicBlock *Entry = &(F.getEntryBlock()); uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); + Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, + *ExitOutgoing = nullptr, *ExitIncoming = nullptr; + uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; + // Add a fake edge to the entry. - addEdge(nullptr, BB, EntryWeight); + EntryIncoming = &addEdge(nullptr, Entry, EntryWeight); + DEBUG(dbgs() << " Edge: from fake node to " << Entry->getName() + << " w = " << EntryWeight << "\n"); // Special handling for single BB functions. - if (succ_empty(BB)) { - addEdge(BB, nullptr, EntryWeight); + if (succ_empty(Entry)) { + addEdge(Entry, nullptr, EntryWeight); return; } @@ -126,16 +136,62 @@ } if (BPI != nullptr) Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); - addEdge(&*BB, TargetBB, Weight).IsCritical = Critical; + auto *E = &addEdge(&*BB, TargetBB, Weight); + E->IsCritical = Critical; DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " << TargetBB->getName() << " w=" << Weight << "\n"); + + // Keep track of entry/exit edges: + if (&*BB == Entry) { + if (Weight > MaxEntryOutWeight) { + MaxEntryOutWeight = Weight; + EntryOutgoing = E; + } + } + + auto *TargetTI = TargetBB->getTerminator(); + if (TargetTI && !TargetTI->getNumSuccessors()) { + if (Weight > MaxExitInWeight) { + MaxExitInWeight = Weight; + ExitIncoming = E; + } + } } } else { - addEdge(&*BB, nullptr, BBWeight); - DEBUG(dbgs() << " Edge: from " << BB->getName() << " to exit" + ExitBlockFound = true; + Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight); + if (BBWeight > MaxExitOutWeight) { + MaxExitOutWeight = BBWeight; + ExitOutgoing = ExitO; + } + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to fake exit" << " w = " << BBWeight << "\n"); } } + + // Entry/exit edge adjustment heurisitic: + // prefer instrumenting entry edge over exit edge + // if possible. Those exit edges may never have a chance to be + // executed (for instance the program is an event handling loop) + // before the profile is asynchronously dumped. + // + // If EntryIncoming and ExitOutgoing has similar weight, make sure + // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing + // and ExitIncoming has similar weight, make sure ExitIncoming becomes + // the min-edge. + uint64_t EntryInWeight = EntryWeight; + + if (EntryInWeight >= MaxExitOutWeight && + EntryInWeight * 2 < MaxExitOutWeight * 3) { + EntryIncoming->Weight = MaxExitOutWeight; + ExitOutgoing->Weight = EntryInWeight + 1; + } + + if (MaxEntryOutWeight >= MaxExitInWeight && + MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { + EntryOutgoing->Weight = MaxExitInWeight; + ExitIncoming->Weight = MaxEntryOutWeight + 1; + } } // Sort CFG edges based on its weight. @@ -167,6 +223,10 @@ for (auto &Ei : AllEdges) { if (Ei->Removed) continue; + // If we detect infinite loops, force + // instrumenting the entry edge: + if (!ExitBlockFound && Ei->SrcBB == nullptr) + continue; if (unionGroups(Ei->SrcBB, Ei->DestBB)) Ei->InMST = true; } Index: llvm/trunk/test/Transforms/PGOProfile/Inputs/landingpad.proftext =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/Inputs/landingpad.proftext +++ llvm/trunk/test/Transforms/PGOProfile/Inputs/landingpad.proftext @@ -11,6 +11,6 @@ bar 24868915205 2 -1 +3 2 Index: llvm/trunk/test/Transforms/PGOProfile/Inputs/noreturncall.proftext =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/Inputs/noreturncall.proftext +++ llvm/trunk/test/Transforms/PGOProfile/Inputs/noreturncall.proftext @@ -6,6 +6,6 @@ # Num Counters: 3 # Counter Values: -20 +21 21 0 Index: llvm/trunk/test/Transforms/PGOProfile/branch1.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/branch1.ll +++ llvm/trunk/test/Transforms/PGOProfile/branch1.ll @@ -32,7 +32,7 @@ ; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]] entry: ; GEN: entry: -; GEN-NOT: llvm.instrprof.increment +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 0) %cmp = icmp sgt i32 %i, 0 br i1 %cmp, label %if.then, label %if.end ; USE: br i1 %cmp, label %if.then, label %if.end @@ -50,7 +50,8 @@ if.end: ; GEN: if.end: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 0) +; GEN-NOT: llvm.instrprof.increment +; GEN: ret i32 %retv = phi i32 [ %add, %if.then ], [ %i, %entry ] ret i32 %retv } Index: llvm/trunk/test/Transforms/PGOProfile/counter_promo.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/counter_promo.ll +++ llvm/trunk/test/Transforms/PGOProfile/counter_promo.ll @@ -5,6 +5,9 @@ define void @foo(i32 %n, i32 %N) { ; PROMO-LABEL: @foo +; PROMO: {{.*}} = load {{.*}} @__profc_foo{{.*}} 3) +; PROMO-NEXT: add +; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}3) bb: %tmp = add nsw i32 %n, 1 %tmp1 = add nsw i32 %n, -1 @@ -57,9 +60,6 @@ ; ATOMIC_PROMO: atomicrmw add {{.*}} @__profc_foo{{.*}}0), i64 %[[LIVEOUT1]] seq_cst ; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}1), i64 %[[LIVEOUT2]] seq_cst ; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}2), i64 %[[LIVEOUT3]] seq_cst -; PROMO: {{.*}} = load {{.*}} @__profc_foo{{.*}} 3) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}3) ; PROMO-NOT: @__profc_foo Index: llvm/trunk/test/Transforms/PGOProfile/infinite_loop_gen.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/infinite_loop_gen.ll +++ llvm/trunk/test/Transforms/PGOProfile/infinite_loop_gen.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -pgo-instr-gen -S -o - | FileCheck %s + +define void @foo() { +entry: + br label %while.body + ; CHECK: llvm.instrprof.increment + + while.body: ; preds = %entry, %while.body + ; CHECK: llvm.instrprof.increment + call void (...) @bar() #2 + br label %while.body +} + +declare void @bar(...) + +attributes #0 = { nounwind } + Index: llvm/trunk/test/Transforms/PGOProfile/landingpad.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/landingpad.ll +++ llvm/trunk/test/Transforms/PGOProfile/landingpad.ll @@ -16,7 +16,7 @@ define i32 @bar(i32 %i) { entry: ; GEN: entry: -; GEN-NOT: call void @llvm.instrprof.increment +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 0) %rem = srem i32 %i, 3 %tobool = icmp ne i32 %rem, 0 br i1 %tobool, label %if.then, label %if.end @@ -34,7 +34,8 @@ if.end: ; GEN: if.end: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 0) +; GEN-NOT: call void @llvm.instrprof.increment +; GEN: ret i32 ret i32 0 } Index: llvm/trunk/test/Transforms/PGOProfile/loop1.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/loop1.ll +++ llvm/trunk/test/Transforms/PGOProfile/loop1.ll @@ -13,7 +13,7 @@ define i32 @test_simple_for(i32 %n) { entry: ; GEN: entry: -; GEN-NOT: call void @llvm.instrprof.increment +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 1) br label %for.cond for.cond: @@ -41,6 +41,7 @@ for.end: ; GEN: for.end: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 1) +; GEN-NOT: call void @llvm.instrprof.increment +; GEN: ret i32 ret i32 %sum } Index: llvm/trunk/test/Transforms/PGOProfile/loop2.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/loop2.ll +++ llvm/trunk/test/Transforms/PGOProfile/loop2.ll @@ -13,7 +13,7 @@ define i32 @test_nested_for(i32 %r, i32 %s) { entry: ; GEN: entry: -; GEN-NOT: call void @llvm.instrprof.increment +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 2) br label %for.cond.outer for.cond.outer: @@ -65,7 +65,8 @@ for.end.outer: ; GEN: for.end.outer: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 2) +; GEN-NOT: call void @llvm.instrprof.increment +; GEN: ret i32 ret i32 %sum.0 } Index: llvm/trunk/test/Transforms/PGOProfile/noreturncall.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/noreturncall.ll +++ llvm/trunk/test/Transforms/PGOProfile/noreturncall.ll @@ -42,4 +42,4 @@ ret i32 %mul } ; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 21, i32 0} -; USE: ![[BW_IF]] = !{!"branch_weights", i32 0, i32 20} +; USE: ![[BW_IF]] = !{!"branch_weights", i32 0, i32 21}