diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -231,6 +231,7 @@ llvm::Optional BaseAddr; /// The compile unit debug information entry items. std::vector DieArray; + std::vector LastChildren; /// Map from range's start address to end address and corresponding DIE. /// IntervalMap does not support range removal, as a result, we use the diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h --- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -133,7 +133,7 @@ /// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h". class GsymCreator { // Private member variables require Mutex protections - mutable std::recursive_mutex Mutex; + mutable std::mutex Mutex; std::vector Funcs; StringTableBuilder StrTab; StringSet<> StringStorage; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -37,10 +38,9 @@ DWARFSectionKind SectionKind) { const DWARFObject &D = C.getDWARFObj(); addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangesSection(), - &D.getLocSection(), D.getStrSection(), - D.getStrOffsetsSection(), &D.getAddrSection(), - D.getLineSection(), D.isLittleEndian(), false, false, - SectionKind); + &D.getLocSection(), D.getStrSection(), D.getStrOffsetsSection(), + &D.getAddrSection(), D.getLineSection(), D.isLittleEndian(), + false, false, SectionKind); } void DWARFUnitVector::addUnitsForDWOSection(DWARFContext &C, @@ -48,11 +48,11 @@ DWARFSectionKind SectionKind, bool Lazy) { const DWARFObject &D = C.getDWARFObj(); - addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangesDWOSection(), - &D.getLocDWOSection(), D.getStrDWOSection(), - D.getStrOffsetsDWOSection(), &D.getAddrSection(), - D.getLineDWOSection(), C.isLittleEndian(), true, Lazy, - SectionKind); + addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), + &D.getRangesDWOSection(), &D.getLocDWOSection(), + D.getStrDWOSection(), D.getStrOffsetsDWOSection(), + &D.getAddrSection(), D.getLineDWOSection(), C.isLittleEndian(), + true, Lazy, SectionKind); } void DWARFUnitVector::addUnitsImpl( @@ -86,12 +86,12 @@ std::unique_ptr U; if (Header.isTypeUnit()) U = std::make_unique(Context, InfoSection, Header, DA, - RS, LocSection, SS, SOS, AOS, LS, - LE, IsDWO, *this); + RS, LocSection, SS, SOS, AOS, LS, + LE, IsDWO, *this); else - U = std::make_unique(Context, InfoSection, Header, - DA, RS, LocSection, SS, SOS, - AOS, LS, LE, IsDWO, *this); + U = std::make_unique(Context, InfoSection, Header, DA, + RS, LocSection, SS, SOS, AOS, LS, + LE, IsDWO, *this); return U; }; } @@ -304,17 +304,18 @@ // Parse the rangelist table header, including the optional array of offsets // following it (DWARF v5 and later). -template -static Expected -parseListTableHeader(DWARFDataExtractor &DA, uint64_t Offset, - DwarfFormat Format) { +template +static Expected parseListTableHeader(DWARFDataExtractor &DA, + uint64_t Offset, + DwarfFormat Format) { // We are expected to be called with Offset 0 or pointing just past the table // header. Correct Offset in the latter case so that it points to the start // of the header. if (Offset > 0) { uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Format); if (Offset < HeaderSize) - return createStringError(errc::invalid_argument, "did not detect a valid" + return createStringError(errc::invalid_argument, + "did not detect a valid" " list table with base = 0x%" PRIx64 "\n", Offset); Offset -= HeaderSize; @@ -364,17 +365,17 @@ uint32_t Depth = 0; bool IsCUDie = true; - while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset, - Depth)) { + while ( + DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset, Depth)) { if (IsCUDie) { if (AppendCUDie) Dies.push_back(DIE); if (!AppendNonCUDies) break; // The average bytes per DIE entry has been seen to be - // around 14-20 so let's pre-reserve the needed memory for + // around 6-12 so let's pre-reserve the needed memory for // our DIE entries accordingly. - Dies.reserve(Dies.size() + getDebugInfoSize() / 14); + Dies.reserve(Dies.size() + getDebugInfoSize() / 6); IsCUDie = false; } else { Dies.push_back(DIE); @@ -390,10 +391,12 @@ if (Depth > 0) --Depth; if (Depth == 0) - break; // We are done with this compile unit! + break; // We are done with this compile unit! } } + Dies.shrink_to_fit(); + // Give a little bit of info if we encounter corrupt DWARF (our offset // should always terminate at or before the start of the next compilation // unit header). @@ -411,9 +414,48 @@ Context.getRecoverableErrorHandler()(std::move(e)); } +template +auto findLastChildren(const In &InRange, const GetDepthFunc &GetDepth, + const HasChildrenPred &HasChildren, + const IsEndTagPred &IsEndTag) { + using DepthType = decltype(GetDepth(*InRange.begin())); + using IndexType = typename In::size_type; + + std::vector LastChildren(InRange.size(), 0); + + std::stack> ParentIdxs; + for (IndexType I = 0, EndIdx = InRange.size(); I < EndIdx; ++I) { + if (!ParentIdxs.empty()) { + DepthType CurrentDepth = GetDepth(InRange[I]); + + DepthType StackSize = ParentIdxs.size(); + if (CurrentDepth <= StackSize) { + // a child of the current parent + while (CurrentDepth < StackSize--) { + ParentIdxs.pop(); + } + + assert(!LastChildren[ParentIdxs.top()]); + + if (IsEndTag(InRange[I])) { + // the last child of the current parent + LastChildren[ParentIdxs.top()] = I; + } + } + } + + if (HasChildren(InRange[I])) { + assert(GetDepth(InRange[I]) == ParentIdxs.size()); + ParentIdxs.push(I); + } + } + + return LastChildren; +} + Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { - if ((CUDieOnly && !DieArray.empty()) || - DieArray.size() > 1) + if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1) return Error::success(); // Already parsed. bool HasCUDie = !DieArray.empty(); @@ -510,6 +552,13 @@ isLittleEndian, getAddressByteSize())); } + LastChildren = + findLastChildren(DieArray, std::mem_fn(&DWARFDebugInfoEntry::getDepth), + std::mem_fn(&DWARFDebugInfoEntry::hasChildren), + [](const DWARFDebugInfoEntry &DIE) { + return DIE.getTag() == dwarf::DW_TAG_null; + }); + // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for // skeleton CU DIE, so that DWARF users not aware of it are not broken. return Error::success(); @@ -678,9 +727,8 @@ return R->second.second; } -void -DWARFUnit::getInlinedChainForAddress(uint64_t Address, - SmallVectorImpl &InlinedChain) { +void DWARFUnit::getInlinedChainForAddress( + uint64_t Address, SmallVectorImpl &InlinedChain) { assert(InlinedChain.empty()); // Try to look for subprogram DIEs in the DWO file. parseDWO(); @@ -696,7 +744,7 @@ } if (SubroutineDIE.getTag() == DW_TAG_inlined_subroutine) InlinedChain.push_back(SubroutineDIE); - SubroutineDIE = SubroutineDIE.getParent(); + SubroutineDIE = SubroutineDIE.getParent(); } } @@ -781,15 +829,16 @@ if (!Die->hasChildren()) return DWARFDie(); - uint32_t Depth = Die->getDepth(); - for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx; - ++I) { - if (DieArray[I].getDepth() == Depth + 1 && - DieArray[I].getTag() == dwarf::DW_TAG_null) - return DWARFDie(this, &DieArray[I]); - assert(DieArray[I].getDepth() > Depth && "Not processing children?"); + // We do not want access out of bounds when parsing corrupted debug data. + size_t I = getDIEIndex(Die); + if (I >= LastChildren.size()) + return DWARFDie(); + size_t LastChild = LastChildren[I]; + if (!LastChild) { + // TODO Shouldn't Die->hasChildren() be false in that case? + return DWARFDie(); } - return DWARFDie(); + return DWARFDie(this, &DieArray[LastChild]); } const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { @@ -819,7 +868,8 @@ if (ValidationSize >= Size) if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize)) return *this; - return createStringError(errc::invalid_argument, "length exceeds section size"); + return createStringError(errc::invalid_argument, + "length exceeds section size"); } // Look for a DWARF64-formatted contribution to the string offsets table @@ -827,10 +877,13 @@ static Expected parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) { if (!DA.isValidOffsetForDataOfSize(Offset, 16)) - return createStringError(errc::invalid_argument, "section offset exceeds section size"); + return createStringError(errc::invalid_argument, + "section offset exceeds section size"); if (DA.getU32(&Offset) != dwarf::DW_LENGTH_DWARF64) - return createStringError(errc::invalid_argument, "32 bit contribution referenced from a 64 bit unit"); + return createStringError( + errc::invalid_argument, + "32 bit contribution referenced from a 64 bit unit"); uint64_t Size = DA.getU64(&Offset); uint8_t Version = DA.getU16(&Offset); @@ -845,7 +898,8 @@ static Expected parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) { if (!DA.isValidOffsetForDataOfSize(Offset, 8)) - return createStringError(errc::invalid_argument, "section offset exceeds section size"); + return createStringError(errc::invalid_argument, + "section offset exceeds section size"); uint32_t ContributionSize = DA.getU32(&Offset); if (ContributionSize >= dwarf::DW_LENGTH_lo_reserved) @@ -867,7 +921,8 @@ switch (Format) { case dwarf::DwarfFormat::DWARF64: { if (Offset < 16) - return createStringError(errc::invalid_argument, "insufficient space for 64 bit header prefix"); + return createStringError(errc::invalid_argument, + "insufficient space for 64 bit header prefix"); auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, Offset - 16); if (!DescOrError) return DescOrError.takeError(); @@ -876,7 +931,8 @@ } case dwarf::DwarfFormat::DWARF32: { if (Offset < 8) - return createStringError(errc::invalid_argument, "insufficient space for 32 bit header prefix"); + return createStringError(errc::invalid_argument, + "insufficient space for 32 bit header prefix"); auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, Offset - 8); if (!DescOrError) return DescOrError.takeError(); @@ -901,7 +957,7 @@ } Expected> -DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) { +DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA) { assert(IsDWO); uint64_t Offset = 0; auto IndexEntry = Header.getIndexEntry(); @@ -914,7 +970,8 @@ return None; Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16; // Look for a valid contribution at the given offset. - auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset); + auto DescOrError = + parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset); if (!DescOrError) return DescOrError.takeError(); return *DescOrError; diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -35,7 +35,7 @@ const uint32_t Base = insertString(filename); FileEntry FE(Dir, Base); - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); const auto NextIndex = Files.size(); // Find FE in hash map and insert if not present. auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); @@ -55,7 +55,7 @@ } llvm::Error GsymCreator::encode(FileWriter &O) const { - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); if (Funcs.empty()) return createStringError(std::errc::invalid_argument, "no functions to encode"); @@ -188,7 +188,7 @@ } llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); if (Finalized) return createStringError(std::errc::invalid_argument, "already finalized"); Finalized = true; @@ -293,7 +293,10 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) { if (S.empty()) return 0; - std::lock_guard Guard(Mutex); + + // The hash can be calculated outside the lock. + CachedHashStringRef CHStr(S); + std::lock_guard Guard(Mutex); if (Copy) { // We need to provide backing storage for the string if requested // since StringTableBuilder stores references to strings. Any string @@ -301,22 +304,22 @@ // copied, but any string created by code will need to be copied. // This allows GsymCreator to be really fast when parsing DWARF and // other object files as most strings don't need to be copied. - CachedHashStringRef CHStr(S); if (!StrTab.contains(CHStr)) - S = StringStorage.insert(S).first->getKey(); + CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), + CHStr.hash()}; } - return StrTab.add(S); + return StrTab.add(CHStr); } void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); Ranges.insert(FI.Range); - Funcs.emplace_back(FI); + Funcs.emplace_back(std::move(FI)); } void GsymCreator::forEachFunctionInfo( std::function const &Callback) { - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); for (auto &FI : Funcs) { if (!Callback(FI)) break; @@ -325,7 +328,7 @@ void GsymCreator::forEachFunctionInfo( std::function const &Callback) const { - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); for (const auto &FI : Funcs) { if (!Callback(FI)) break; @@ -333,7 +336,7 @@ } size_t GsymCreator::getNumFunctionInfos() const { - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); return Funcs.size(); } @@ -344,6 +347,6 @@ } bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { - std::lock_guard Guard(Mutex); + std::lock_guard Guard(Mutex); return Ranges.contains(Addr); }