Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -544,6 +544,12 @@ const std::string infoString() const { return (Twine("Index=") + Twine(Index)).str(); } + + // Empty function -- only applicable to UseBBInfo. + void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} + + // Empty function -- only applicable to UseBBInfo. + void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} }; // This class implements the CFG edges. Note the CFG can be a multi-graph. @@ -748,7 +754,7 @@ } // Collect all the BBs that will be instruments and return them in -// InstrumentBBs. +// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. template void FuncPGOInstrumentation::getInstrumentBBs( std::vector &InstrumentBBs) { @@ -763,6 +769,18 @@ if (InstrBB) InstrumentBBs.push_back(InstrBB); } + + // Set up InEdges/OutEdges for all BBs. + for (auto &E : MST.AllEdges) { + if (E->Removed) + continue; + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + BBInfo &SrcInfo = getBBInfo(SrcBB); + BBInfo &DestInfo = getBBInfo(DestBB); + SrcInfo.addOutEdge(E.get()); + DestInfo.addInEdge(E.get()); + } } // Given a CFG E to be instrumented, find which BB to place the instrumented @@ -780,19 +798,25 @@ if (DestBB == nullptr) return SrcBB; + auto canInstrument = [this](BasicBlock *BB) -> BasicBlock * { + // There are basic blocks (such as catchswitch) cannot be instrumented. + // If the returned first insertion point is the end of BB, skip this BB. + if (BB->getFirstInsertionPt() == BB->end()) { + LLVM_DEBUG(dbgs() << "Cannot instrument BB index=" << getBBInfo(BB).Index + << "\n"); + return nullptr; + } + return BB; + }; + // Instrument the SrcBB if it has a single successor, // otherwise, the DestBB if this is not a critical edge. Instruction *TI = SrcBB->getTerminator(); if (TI->getNumSuccessors() <= 1) - return SrcBB; + return canInstrument(SrcBB); if (!E->IsCritical) - return DestBB; + return canInstrument(DestBB); - // For a critical edge, we have to split. Instrument the newly - // created BB. - IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; - LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index - << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); if (!InstrBB) { @@ -800,6 +824,11 @@ dbgs() << "Fail to split critical edge: not instrument this edge.\n"); return nullptr; } + // For a critical edge, we have to split. Instrument the newly + // created BB. + IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; + LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index + << " --> " << getBBInfo(DestBB).Index << "\n"); // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. MST.addEdge(SrcBB, InstrBB, 0); // Second one: Add new edge of InstrBB->DestBB. @@ -807,7 +836,7 @@ NewEdge1.InMST = true; E->Removed = true; - return InstrBB; + return canInstrument(InstrBB); } // Visit all edge and instrument the edges not in MST, and do value profiling. @@ -925,6 +954,18 @@ return BBInfo::infoString(); return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); } + + // Add an OutEdge and update the edge count. + void addOutEdge(PGOUseEdge *E) { + OutEdges.push_back(E); + UnknownCountOutEdge++; + } + + // Add an InEdge and update the edge count. + void addInEdge(PGOUseEdge *E) { + InEdges.push_back(E); + UnknownCountInEdge++; + } }; } // end anonymous namespace @@ -1069,24 +1110,50 @@ if (NumCounters != CountFromProfile.size()) { return false; } + // Set the profile count to the Instrumented BBs. uint32_t I = 0; for (BasicBlock *InstrBB : InstrumentBBs) { uint64_t CountValue = CountFromProfile[I++]; UseBBInfo &Info = getBBInfo(InstrBB); Info.setBBInfoCount(CountValue); - // If only one in-edge, the edge profile count should be the same as BB - // profile count. - if (Info.InEdges.size() == 1) { - Info.InEdges[0]->setEdgeCount(CountValue); - } + } + ProfileCountSize = CountFromProfile.size(); + CountPosition = I; + + // Set the edge count and update the count of unknown edges for BBs. + auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { + E->setEdgeCount(Value); + this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; + this->getBBInfo(E->DestBB).UnknownCountInEdge--; + }; + + // Set the profile count the Instrumented edges. There are BBs that not in + // MST but not instrumented. Need to set the edge count value so that we can + // populate the profile counts later. + for (auto &E : FuncInfo.MST.AllEdges) { + if (E->Removed || E->InMST) + continue; + const BasicBlock *SrcBB = E->SrcBB; + UseBBInfo &SrcInfo = getBBInfo(SrcBB); + // If only one out-edge, the edge profile count should be the same as BB // profile count. - if (Info.OutEdges.size() == 1) { - Info.OutEdges[0]->setEdgeCount(CountValue); + if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) + setEdgeCount(E.get(), SrcInfo.CountValue); + else { + const BasicBlock *DestBB = E->DestBB; + UseBBInfo &DestInfo = getBBInfo(DestBB); + // If only one in-edge, the edge profile count should be the same as BB + // profile count. + if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) + setEdgeCount(E.get(), DestInfo.CountValue); } + if (E->CountValid) + continue; + // E's count should have been set from profile. If not, this meenas E skips + // the instrumentation. We set the count to 0. + setEdgeCount(E.get(), 0); } - ProfileCountSize = CountFromProfile.size(); - CountPosition = I; return true; } @@ -1180,26 +1247,6 @@ // Populate the counters from instrumented BBs to all BBs. // In the end of this operation, all BBs should have a valid count value. void PGOUseFunc::populateCounters() { - // First set up Count variable for all BBs. - for (auto &E : FuncInfo.MST.AllEdges) { - if (E->Removed) - continue; - - const BasicBlock *SrcBB = E->SrcBB; - const BasicBlock *DestBB = E->DestBB; - UseBBInfo &SrcInfo = getBBInfo(SrcBB); - UseBBInfo &DestInfo = getBBInfo(DestBB); - SrcInfo.OutEdges.push_back(E.get()); - DestInfo.InEdges.push_back(E.get()); - SrcInfo.UnknownCountOutEdge++; - DestInfo.UnknownCountInEdge++; - - if (!E->CountValid) - continue; - DestInfo.UnknownCountInEdge--; - SrcInfo.UnknownCountOutEdge--; - } - bool Changes = true; unsigned NumPasses = 0; while (Changes) { Index: llvm/test/Transforms/PGOProfile/Inputs/PR41279.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/PR41279.proftext @@ -0,0 +1,9 @@ +:ir +foo +60927483247 +4 +3 +2 +3 +2 + Index: llvm/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext @@ -0,0 +1,7 @@ +:ir +f +62077759478 +2 +3 +2 + Index: llvm/test/Transforms/PGOProfile/PR41279.ll =================================================================== --- llvm/test/Transforms/PGOProfile/PR41279.ll +++ llvm/test/Transforms/PGOProfile/PR41279.ll @@ -1,6 +1,9 @@ ; Test that instrumentaiton works fine for the case of failing the split critical edges. ; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN ; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: llvm-profdata merge %S/Inputs/PR41279.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64) declare { i8*, i64 } @f0({ i8*, i64 }*) @@ -9,17 +12,22 @@ declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64) declare i32 @__CxxFrameHandler3(...) -define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 { +define void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 { +; USE-LABEL: @foo +; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]] + %3 = alloca i8, align 1 store i8 0, i8* %3, align 1 %4 = call i64 @f1() %5 = icmp ult i64 %4, 32 br i1 %5, label %7, label %13 +; USE: br i1 %5, label %7, label %13 +; USE-SAME: !prof ![[BW_ENTRY1:[0-9]+]] 6: cleanupret from %17 unwind to caller ; GEN: 6: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2) 7: store i8 1, i8* %3, align 1 @@ -42,13 +50,13 @@ store i8 0, i8* %3, align 1 br label %14 ; GEN: 12: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1) 13: call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1) br label %14 ; GEN: 13: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0) 14: ret void @@ -57,11 +65,20 @@ store i8 0, i8* %3, align 1 br label %6 ; GEN: 15: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3) 16: %17 = cleanuppad within none [] %18 = load i8, i8* %3, align 1 %19 = trunc i8 %18 to i1 br i1 %19, label %15, label %6 +; USE: br i1 %19, label %15, label %6 +; USE-SAME: !prof ![[BW_ENTRY2:[0-9]+]] } + +; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}} +; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}} +; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 8} +; USE_DAG: ![[BW_ENTRY1]] = !{!"branch_weights", i32 5, i32 3} +; USE_DAG: ![[BW_ENTRY2]] = !{!"branch_weights", i32 2, i32 1} + Index: llvm/test/Transforms/PGOProfile/PR41279_2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/PR41279_2.ll @@ -0,0 +1,68 @@ +; Test that instrumentaiton works fine for the case of catchswitch stmts. +; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: llvm-profdata merge %S/Inputs/PR41279_2.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE + + +define dso_local void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { +; USE-LABEL: @f +; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]] +; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}} +; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}} +; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 5} +entry: + %__exception_code = alloca i32, align 4 + %__exception_code2 = alloca i32, align 4 + invoke void @f() #2 + to label %invoke.cont unwind label %catch.dispatch + +catch.dispatch: + %0 = catchswitch within none [label %__except] unwind to caller + +__except: + %1 = catchpad within %0 [i8* null] + catchret from %1 to label %__except1 + +__except1: + %2 = call i32 @llvm.eh.exceptioncode(token %1) + store i32 %2, i32* %__exception_code, align 4 + br label %__try.cont7 +;GEN: _except1: +;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 1) + +invoke.cont: + br label %__try.cont + +__try.cont: + invoke void @f() + to label %invoke.cont3 unwind label %catch.dispatch4 + +catch.dispatch4: + %3 = catchswitch within none [label %__except5] unwind to caller +; GEN: catch.dispatch4: +; GEN-NOT: call void @llvm.instrprof.increment + +__except5: + %4 = catchpad within %3 [i8* null] + catchret from %4 to label %__except6 + +__except6: + %5 = call i32 @llvm.eh.exceptioncode(token %4) + store i32 %5, i32* %__exception_code2, align 4 + br label %__try.cont7 + +__try.cont7: + ret void + +invoke.cont3: + br label %__try.cont7 +;GEN: invoke.cont3: +;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 0) + +} + +declare dso_local i32 @__C_specific_handler(...) + +declare i32 @llvm.eh.exceptioncode(token)