Index: llvm/trunk/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProf.h +++ llvm/trunk/include/llvm/ProfileData/InstrProf.h @@ -425,9 +425,20 @@ // A map from function runtime address to function name MD5 hash. // This map is only populated and used by raw instr profile reader. AddrHashMap AddrToMD5Map; + bool Sorted = false; + + static StringRef getExternalSymbol() { + return "** External Symbol **"; + } + + // If the symtab is created by a series of calls to \c addFuncName, \c + // finalizeSymtab needs to be called before looking up function names. + // This is required because the underlying map is a vector (for space + // efficiency) which needs to be sorted. + inline void finalizeSymtab(); public: - InstrProfSymtab() = default; + InstrProfSymtab() = default; /// Create InstrProfSymtab from an object file section which /// contains function PGO names. When section may contain raw @@ -456,21 +467,17 @@ /// \p IterRange. This interface is used by IndexedProfReader. template Error create(const NameIterRange &IterRange); - // If the symtab is created by a series of calls to \c addFuncName, \c - // finalizeSymtab needs to be called before looking up function names. - // This is required because the underlying map is a vector (for space - // efficiency) which needs to be sorted. - inline void finalizeSymtab(); - /// Update the symtab by adding \p FuncName to the table. This interface /// is used by the raw and text profile readers. Error addFuncName(StringRef FuncName) { if (FuncName.empty()) return make_error(instrprof_error::malformed); auto Ins = NameTab.insert(FuncName); - if (Ins.second) + if (Ins.second) { MD5NameMap.push_back(std::make_pair( IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + Sorted = false; + } return Error::success(); } @@ -491,6 +498,16 @@ /// If not found, return an empty string. inline StringRef getFuncName(uint64_t FuncMD5Hash); + /// Just like getFuncName, except that it will return a non-empty StringRef + /// if the function is external to this symbol table. All such cases + /// will be represented using the same StringRef value. + inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash); + + /// True if Symbol is the value used to represent external symbols. + static bool isExternalSymbol(const StringRef &Symbol) { + return Symbol == InstrProfSymtab::getExternalSymbol(); + } + /// Return function from the name's md5 hash. Return nullptr if not found. inline Function *getFunction(uint64_t FuncMD5Hash); @@ -524,14 +541,25 @@ } void InstrProfSymtab::finalizeSymtab() { + if (Sorted) + return; std::sort(MD5NameMap.begin(), MD5NameMap.end(), less_first()); std::sort(MD5FuncMap.begin(), MD5FuncMap.end(), less_first()); std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first()); AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), AddrToMD5Map.end()); + Sorted = true; +} + +StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { + StringRef ret = getFuncName(FuncMD5Hash); + if (ret.empty()) + return InstrProfSymtab::getExternalSymbol(); + return ret; } StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { + finalizeSymtab(); auto Result = std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash, [](const std::pair &LHS, @@ -542,6 +570,7 @@ } Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { + finalizeSymtab(); auto Result = std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash, [](const std::pair &LHS, Index: llvm/trunk/include/llvm/ProfileData/InstrProfReader.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProfReader.h +++ llvm/trunk/include/llvm/ProfileData/InstrProfReader.h @@ -101,7 +101,7 @@ return make_error(Err); } - Error error(Error E) { return error(InstrProfError::take(std::move(E))); } + Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } /// Clear the current error and return a successful one. Error success() { return error(instrprof_error::success); } Index: llvm/trunk/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/trunk/lib/ProfileData/InstrProf.cpp +++ llvm/trunk/lib/ProfileData/InstrProf.cpp @@ -355,7 +355,7 @@ } } } - + Sorted = false; finalizeSymtab(); return Error::success(); } @@ -461,7 +461,6 @@ while (P < EndP && *P == 0) P++; } - Symtab.finalizeSymtab(); return Error::success(); } Index: llvm/trunk/lib/ProfileData/InstrProfReader.cpp =================================================================== --- llvm/trunk/lib/ProfileData/InstrProfReader.cpp +++ llvm/trunk/lib/ProfileData/InstrProfReader.cpp @@ -200,9 +200,13 @@ std::pair VD = Line->rsplit(':'); uint64_t TakenCount, Value; if (ValueKind == IPVK_IndirectCallTarget) { - if (Error E = Symtab->addFuncName(VD.first)) - return E; - Value = IndexedInstrProf::ComputeHash(VD.first); + if (InstrProfSymtab::isExternalSymbol(VD.first)) { + Value = 0; + } else { + if (Error E = Symtab->addFuncName(VD.first)) + return E; + Value = IndexedInstrProf::ComputeHash(VD.first); + } } else { READ_NUM(VD.first, Value); } @@ -227,14 +231,13 @@ ++Line; // If we hit EOF while looking for a name, we're done. if (Line.is_at_end()) { - Symtab->finalizeSymtab(); return error(instrprof_error::eof); } // Read the function name. Record.Name = *Line++; if (Error E = Symtab->addFuncName(Record.Name)) - return E; + return error(std::move(E)); // Read the function hash. if (Line.is_at_end()) @@ -265,11 +268,8 @@ // Check if value profile data exists and read it if so. if (Error E = readValueProfileData(Record)) - return E; + return error(std::move(E)); - // This is needed to avoid two pass parsing because llvm-profdata - // does dumping while reading. - Symtab->finalizeSymtab(); return success(); } @@ -331,7 +331,6 @@ continue; Symtab.mapAddress(FPtr, I->NameRef); } - Symtab.finalizeSymtab(); return success(); } @@ -449,23 +448,23 @@ if (atEnd()) // At this point, ValueDataStart field points to the next header. if (Error E = readNextHeader(getNextHeaderPos())) - return E; + return error(std::move(E)); // Read name ad set it in Record. if (Error E = readName(Record)) - return E; + return error(std::move(E)); // Read FuncHash and set it in Record. if (Error E = readFuncHash(Record)) - return E; + return error(std::move(E)); // Read raw counts and set Record. if (Error E = readRawCounts(Record)) - return E; + return error(std::move(E)); // Read value data and set Record. if (Error E = readValueProfilingData(Record)) - return E; + return error(std::move(E)); // Iterate. advanceData(); Index: llvm/trunk/lib/ProfileData/InstrProfWriter.cpp =================================================================== --- llvm/trunk/lib/ProfileData/InstrProfWriter.cpp +++ llvm/trunk/lib/ProfileData/InstrProfWriter.cpp @@ -361,7 +361,8 @@ std::unique_ptr VD = Func.getValueForSite(VK, S); for (uint32_t I = 0; I < ND; I++) { if (VK == IPVK_IndirectCallTarget) - OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n"; + OS << Symtab.getFuncNameOrExternalSymbol(VD[I].Value) << ":" + << VD[I].Count << "\n"; else OS << VD[I].Value << ":" << VD[I].Count << "\n"; } @@ -379,7 +380,6 @@ if (shouldEncodeData(I.getValue())) if (Error E = Symtab.addFuncName(I.getKey())) return E; - Symtab.finalizeSymtab(); for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) Index: llvm/trunk/test/tools/llvm-profdata/invalid-profdata.test =================================================================== --- llvm/trunk/test/tools/llvm-profdata/invalid-profdata.test +++ llvm/trunk/test/tools/llvm-profdata/invalid-profdata.test @@ -0,0 +1,50 @@ +RUN: echo ":ir" > %t.input +RUN: echo "_ZN6Thread5StartEv" >> %t.input +RUN: echo "# Func Hash:" >> %t.input +RUN: echo "288793635542036872" >> %t.input +RUN: echo "# Num Counters:" >> %t.input +RUN: echo "3" >> %t.input +RUN: echo "# Counter Values:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "# Num Value Kinds:" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo "# ValueKind = IPVK_IndirectCallTarget:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "# NumValueSites:" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "f1:10" >> %t.input +RUN: echo "f2:0" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo ":10" >> %t.input + +RUN: not llvm-profdata merge %t.input -text -output=/dev/null 2>&1 | FileCheck %s --check-prefix=BROKEN +BROKEN: Malformed instrumentation profile data + +RUN: echo ":ir" > %t.input +RUN: echo "_ZN6Thread5StartEv" >> %t.input +RUN: echo "# Func Hash:" >> %t.input +RUN: echo "288793635542036872" >> %t.input +RUN: echo "# Num Counters:" >> %t.input +RUN: echo "3" >> %t.input +RUN: echo "# Counter Values:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "# Num Value Kinds:" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo "# ValueKind = IPVK_IndirectCallTarget:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "# NumValueSites:" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "f1:10" >> %t.input +RUN: echo "f2:0" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo "** External Symbol **:10" >> %t.input + +# RUN: llvm-profdata merge %t.input -text -output=%t.out && cat %t.out | FileCheck %s + +CHECK: ** External Symbol **:10 Index: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp =================================================================== --- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp +++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp @@ -769,7 +769,6 @@ Symtab.mapAddress(uint64_t(callee3), 0x3000ULL); Symtab.mapAddress(uint64_t(callee4), 0x4000ULL); // Missing mapping for callee5 - Symtab.finalizeSymtab(); VPData->deserializeTo(Record, &Symtab.getAddrHashMap()); @@ -858,8 +857,6 @@ EXPECT_THAT_ERROR(Symtab.addFuncName("blah_1"), Succeeded()); EXPECT_THAT_ERROR(Symtab.addFuncName("blah_2"), Succeeded()); EXPECT_THAT_ERROR(Symtab.addFuncName("blah_3"), Succeeded()); - // Finalize it - Symtab.finalizeSymtab(); // Check again R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_1"));