Index: llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -573,6 +573,10 @@ // The Minimum Spanning Tree of function CFG. CFGMST MST; + // Collect all the BBs that will be instrumented, and store them in + // InstrumentBBs. + void getInstrumentBBs(std::vector &InstrumentBBs); + // Give an edge, find the BB that will be instrumented. // Return nullptr if there is no BB to be instrumented. BasicBlock *getInstrBB(Edge *E); @@ -629,16 +633,6 @@ if (CreateGlobalVar) FuncNameVar = createPGOFuncNameVar(F, FuncName); } - - // Return the number of profile counters needed for the function. - unsigned getNumCounters() { - unsigned NumCounters = 0; - for (auto &E : this->MST.AllEdges) { - if (!E->InMST && !E->Removed) - NumCounters++; - } - return NumCounters + SIVisitor.getNumOfSelectInsts(); - } }; } // end anonymous namespace @@ -753,6 +747,24 @@ } } +// Collect all the BBs that will be instruments and return them in +// InstrumentBBs. +template +void FuncPGOInstrumentation::getInstrumentBBs( + std::vector &InstrumentBBs) { + // Use a worklist as we will update the vector during the iteration. + std::vector EdgeList; + EdgeList.reserve(MST.AllEdges.size()); + for (auto &E : MST.AllEdges) + EdgeList.push_back(E.get()); + + for (auto &E : EdgeList) { + BasicBlock *InstrBB = getInstrBB(E); + if (InstrBB) + InstrumentBBs.push_back(InstrBB); + } +} + // Given a CFG E to be instrumented, find which BB to place the instrumented // code. The function will split the critical edge if necessary. template @@ -783,9 +795,18 @@ << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); - assert(InstrBB && "Critical edge is not split"); - + if (!InstrBB) { + LLVM_DEBUG( + dbgs() << "Fail to split critical edge: not instrument this edge.\n"); + return nullptr; + } + // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. + MST.addEdge(SrcBB, InstrBB, 0); + // Second one: Add new edge of InstrBB->DestBB. + Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); + NewEdge1.InMST = true; E->Removed = true; + return InstrBB; } @@ -801,15 +822,14 @@ FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, BFI, IsCS); - unsigned NumCounters = FuncInfo.getNumCounters(); + std::vector InstrumentBBs; + FuncInfo.getInstrumentBBs(InstrumentBBs); + unsigned NumCounters = + InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); uint32_t I = 0; Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); - for (auto &E : FuncInfo.MST.AllEdges) { - BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); - if (!InstrBB) - continue; - + for (auto *InstrBB : InstrumentBBs) { IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); assert(Builder.GetInsertPoint() != InstrBB->end() && "Cannot get the Instrumentation point"); @@ -1039,39 +1059,31 @@ // edges and the BB. Return false on error. bool PGOUseFunc::setInstrumentedCounts( const std::vector &CountFromProfile) { + + std::vector InstrumentBBs; + FuncInfo.getInstrumentBBs(InstrumentBBs); + unsigned NumCounters = + InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); // The number of counters here should match the number of counters // in profile. Return if they mismatch. - if (FuncInfo.getNumCounters() != CountFromProfile.size()) { + if (NumCounters != CountFromProfile.size()) { return false; } - // Use a worklist as we will update the vector during the iteration. - std::vector WorkList; - for (auto &E : FuncInfo.MST.AllEdges) - WorkList.push_back(E.get()); - uint32_t I = 0; - for (auto &E : WorkList) { - BasicBlock *InstrBB = FuncInfo.getInstrBB(E); - if (!InstrBB) - continue; + for (BasicBlock *InstrBB : InstrumentBBs) { uint64_t CountValue = CountFromProfile[I++]; - if (!E->Removed) { - getBBInfo(InstrBB).setBBInfoCount(CountValue); - E->setEdgeCount(CountValue); - continue; + UseBBInfo &Info = getBBInfo(InstrBB); + Info.setBBInfoCount(CountValue); + // If only one in-edge, the edge profile count should be the same as BB + // profile count. + if (Info.InEdges.size() == 1) { + Info.InEdges[0]->setEdgeCount(CountValue); + } + // If only one out-edge, the edge profile count should be the same as BB + // profile count. + if (Info.OutEdges.size() == 1) { + Info.OutEdges[0]->setEdgeCount(CountValue); } - - // Need to add two new edges. - BasicBlock *SrcBB = const_cast(E->SrcBB); - BasicBlock *DestBB = const_cast(E->DestBB); - // Add new edge of SrcBB->InstrBB. - PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); - NewEdge.setEdgeCount(CountValue); - // Add new edge of InstrBB->DestBB. - PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); - NewEdge1.setEdgeCount(CountValue); - NewEdge1.InMST = true; - getBBInfo(InstrBB).setBBInfoCount(CountValue); } ProfileCountSize = CountFromProfile.size(); CountPosition = I; Index: llvm/trunk/test/Transforms/PGOProfile/PR41279.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/PR41279.ll +++ llvm/trunk/test/Transforms/PGOProfile/PR41279.ll @@ -0,0 +1,67 @@ +; Test that instrumentaiton works fine for the case of failing the split critical edges. +; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN + +declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64) +declare { i8*, i64 } @f0({ i8*, i64 }*) +declare i64 @f1() +declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64) +declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64) +declare i32 @__CxxFrameHandler3(...) + +define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 { + %3 = alloca i8, align 1 + store i8 0, i8* %3, align 1 + %4 = call i64 @f1() + %5 = icmp ult i64 %4, 32 + br i1 %5, label %7, label %13 + +6: + cleanupret from %17 unwind to caller +; GEN: 6: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2) + +7: + store i8 1, i8* %3, align 1 + %8 = call { i8*, i64 } @f0({ i8*, i64 }* %0) + %9 = extractvalue { i8*, i64 } %8, 0 + %10 = extractvalue { i8*, i64 } %8, 1 + invoke void @invok1({ i8*, i64 }* %1, { i8*, i64 }* %0, i64 1) + to label %11 unwind label %16 +; GEN: 7: +; GEN-NOT: call void @llvm.instrprof.increment + +11: + store i8 0, i8* %3, align 1 + invoke void @invok2({ i8*, i64 }* %1, i8* noalias readonly align 1 %9, i64 %10) + to label %12 unwind label %16 +; GEN: 11: +; GEN-NOT: call void @llvm.instrprof.increment + +12: + store i8 0, i8* %3, align 1 + br label %14 +; GEN: 12: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1) + +13: + call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1) + br label %14 +; GEN: 13: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0) + +14: + ret void + +15: + store i8 0, i8* %3, align 1 + br label %6 +; GEN: 15: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3) + +16: + %17 = cleanuppad within none [] + %18 = load i8, i8* %3, align 1 + %19 = trunc i8 %18 to i1 + br i1 %19, label %15, label %6 +}