Index: lldb/trunk/include/lldb/Core/MappedHash.h =================================================================== --- lldb/trunk/include/lldb/Core/MappedHash.h +++ lldb/trunk/include/lldb/Core/MappedHash.h @@ -24,6 +24,7 @@ // Project includes #include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/Stream.h" +#include "llvm/Support/DJB.h" class MappedHash { public: @@ -32,22 +33,10 @@ // by the ELF GNU_HASH sections }; - static uint32_t HashStringUsingDJB(const char *s) { - uint32_t h = 5381; - - for (unsigned char c = *s; c; c = *++s) - h = ((h << 5) + h) + c; - - return h; - } - - static uint32_t HashString(uint32_t hash_function, const char *s) { - if (!s) - return 0; - + static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) { switch (hash_function) { case MappedHash::eHashFunctionDJB: - return HashStringUsingDJB(s); + return llvm::djbHash(s); default: break; @@ -152,164 +141,6 @@ // Write (int fd); }; - template - class ExportTable { - public: - typedef __HeaderDataType HeaderDataType; - typedef Header HeaderType; - typedef __KeyType KeyType; - typedef __ValueType ValueType; - - struct Entry { - uint32_t hash; - KeyType key; - ValueType value; - }; - - typedef std::vector ValueArrayType; - - typedef std::map HashData; - // Map a name hash to one or more name infos - typedef std::map HashToHashData; - - virtual KeyType GetKeyForStringType(const char *cstr) const = 0; - - virtual size_t GetByteSize(const HashData &key_to_key_values) = 0; - - virtual bool WriteHashData(const HashData &hash_data, - lldb_private::Stream &ostrm) = 0; - // - void AddEntry(const char *cstr, const ValueType &value) { - Entry entry; - entry.hash = MappedHash::HashString(eHashFunctionDJB, cstr); - entry.key = GetKeyForStringType(cstr); - entry.value = value; - m_entries.push_back(entry); - } - - void Save(const HeaderDataType &header_data, lldb_private::Stream &ostrm) { - if (m_entries.empty()) - return; - - const uint32_t num_entries = m_entries.size(); - uint32_t i = 0; - - HeaderType header; - - header.magic = HASH_MAGIC; - header.version = 1; - header.hash_function = eHashFunctionDJB; - header.bucket_count = 0; - header.hashes_count = 0; - header.prologue_length = header_data.GetByteSize(); - - // We need to figure out the number of unique hashes first before we can - // calculate the number of buckets we want to use. - typedef std::vector hash_coll; - hash_coll unique_hashes; - unique_hashes.resize(num_entries); - for (i = 0; i < num_entries; ++i) - unique_hashes[i] = m_entries[i].hash; - std::sort(unique_hashes.begin(), unique_hashes.end()); - hash_coll::iterator pos = - std::unique(unique_hashes.begin(), unique_hashes.end()); - const size_t num_unique_hashes = - std::distance(unique_hashes.begin(), pos); - - if (num_unique_hashes > 1024) - header.bucket_count = num_unique_hashes / 4; - else if (num_unique_hashes > 16) - header.bucket_count = num_unique_hashes / 2; - else - header.bucket_count = num_unique_hashes; - if (header.bucket_count == 0) - header.bucket_count = 1; - - std::vector hash_buckets; - std::vector hash_indexes(header.bucket_count, 0); - std::vector hash_values; - std::vector hash_offsets; - hash_buckets.resize(header.bucket_count); - uint32_t bucket_entry_empties = 0; - // StreamString hash_file_data(Stream::eBinary, - // dwarf->GetObjectFile()->GetAddressByteSize(), - // dwarf->GetObjectFile()->GetByteSize()); - - // Push all of the hashes into their buckets and create all bucket - // entries all populated with data. - for (i = 0; i < num_entries; ++i) { - const uint32_t hash = m_entries[i].hash; - const uint32_t bucket_idx = hash % header.bucket_count; - const uint32_t strp_offset = m_entries[i].str_offset; - const uint32_t die_offset = m_entries[i].die_offset; - hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset); - } - - // Now for each bucket we write the bucket value which is the - // number of hashes and the hash index encoded into a single - // 32 bit unsigned integer. - for (i = 0; i < header.bucket_count; ++i) { - HashToHashData &bucket_entry = hash_buckets[i]; - - if (bucket_entry.empty()) { - // Empty bucket - ++bucket_entry_empties; - hash_indexes[i] = UINT32_MAX; - } else { - const uint32_t hash_value_index = hash_values.size(); - uint32_t hash_count = 0; - typename HashToHashData::const_iterator pos, end = bucket_entry.end(); - for (pos = bucket_entry.begin(); pos != end; ++pos) { - hash_values.push_back(pos->first); - hash_offsets.push_back(GetByteSize(pos->second)); - ++hash_count; - } - - hash_indexes[i] = hash_value_index; - } - } - header.hashes_count = hash_values.size(); - - // Write the header out now that we have the hash_count - header.Write(ostrm); - - // Now for each bucket we write the start index of the hashes - // for the current bucket, or UINT32_MAX if the bucket is empty - for (i = 0; i < header.bucket_count; ++i) { - ostrm.PutHex32(hash_indexes[i]); - } - - // Now we need to write out all of the hash values - for (i = 0; i < header.hashes_count; ++i) { - ostrm.PutHex32(hash_values[i]); - } - - // Now we need to write out all of the hash data offsets, - // there is an offset for each hash in the hashes array - // that was written out above - for (i = 0; i < header.hashes_count; ++i) { - ostrm.PutHex32(hash_offsets[i]); - } - - // Now we write the data for each hash and verify we got the offset - // correct above... - for (i = 0; i < header.bucket_count; ++i) { - HashToHashData &bucket_entry = hash_buckets[i]; - - typename HashToHashData::const_iterator pos, end = bucket_entry.end(); - for (pos = bucket_entry.begin(); pos != end; ++pos) { - if (!bucket_entry.empty()) { - WriteHashData(pos->second); - } - } - } - } - - protected: - typedef std::vector collection; - collection m_entries; - }; - // A class for reading and using a saved hash table from a block of data // in memory template @@ -377,8 +208,8 @@ return result; } - bool Find(const char *name, Pair &pair) const { - if (!name || !name[0]) + bool Find(llvm::StringRef name, Pair &pair) const { + if (name.empty()) return false; if (IsValid()) { @@ -452,7 +283,7 @@ // should be returned. If anything else goes wrong during parsing, // return "eResultError" and the corresponding "Find()" function will // be canceled and return false. - virtual Result GetHashDataForName(const char *name, + virtual Result GetHashDataForName(llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const = 0; Index: lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/Makefile =================================================================== --- lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/Makefile +++ lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/Makefile @@ -0,0 +1,6 @@ +LEVEL = ../../../make + +C_SOURCES := main.c +CFLAGS_EXTRAS += -finput-charset=UTF-8 -fextended-identifiers -std=c99 + +include $(LEVEL)/Makefile.rules Index: lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/TestUnicodeSymbols.py =================================================================== --- lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/TestUnicodeSymbols.py +++ lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/TestUnicodeSymbols.py @@ -0,0 +1,19 @@ +# coding=utf8 +import lldb +from lldbsuite.test.lldbtest import * +import lldbsuite.test.lldbutil as lldbutil + + +class TestUnicodeSymbols(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def test_union_members(self): + self.build() + spec = lldb.SBModuleSpec() + spec.SetFileSpec(lldb.SBFileSpec(self.getBuildArtifact("a.out"))) + module = lldb.SBModule(spec) + self.assertTrue(module.IsValid()) + mytype = module.FindFirstType("foobár") + self.assertTrue(mytype.IsValid()) + self.assertTrue(mytype.IsPointerType()) Index: lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/main.c =================================================================== --- lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/main.c +++ lldb/trunk/packages/Python/lldbsuite/test/lang/c/unicode/main.c @@ -0,0 +1,5 @@ +typedef void *foob\u00E1r; +foob\u00E1r X = 0; +int main() { + return (long)X; +} Index: lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h =================================================================== --- lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h +++ lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h @@ -132,17 +132,17 @@ const uint32_t die_offset_end, DIEInfoArray &die_info_array) const; - size_t FindByName(const char *name, DIEArray &die_offsets); + size_t FindByName(llvm::StringRef name, DIEArray &die_offsets); - size_t FindByNameAndTag(const char *name, const dw_tag_t tag, + size_t FindByNameAndTag(llvm::StringRef name, const dw_tag_t tag, DIEArray &die_offsets); - size_t - FindByNameAndTagAndQualifiedNameHash(const char *name, const dw_tag_t tag, - const uint32_t qualified_name_hash, - DIEArray &die_offsets); + size_t FindByNameAndTagAndQualifiedNameHash( + llvm::StringRef name, const dw_tag_t tag, + const uint32_t qualified_name_hash, DIEArray &die_offsets); - size_t FindCompleteObjCClassByName(const char *name, DIEArray &die_offsets, + size_t FindCompleteObjCClassByName(llvm::StringRef name, + DIEArray &die_offsets, bool must_be_implementation); protected: @@ -150,9 +150,9 @@ const lldb_private::RegularExpression ®ex, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const; - size_t FindByName(const char *name, DIEInfoArray &die_info_array); + size_t FindByName(llvm::StringRef name, DIEInfoArray &die_info_array); - Result GetHashDataForName(const char *name, + Result GetHashDataForName(llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const override; Index: lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp =================================================================== --- lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp +++ lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp @@ -379,7 +379,8 @@ DWARFMappedHash::MemoryTable::Result DWARFMappedHash::MemoryTable::GetHashDataForName( - const char *name, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { + llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, + Pair &pair) const { pair.key = m_data.GetU32(hash_data_offset_ptr); pair.value.clear(); @@ -406,7 +407,7 @@ // data to parse at least "count" HashData entries. // First make sure the entire C string matches... - const bool match = strcmp(name, strp_cstr) == 0; + const bool match = name == strp_cstr; if (!match && m_header.header_data.HashDataHasFixedByteSize()) { // If the string doesn't match and we have fixed size data, @@ -573,9 +574,9 @@ return die_info_array.size(); } -size_t DWARFMappedHash::MemoryTable::FindByName(const char *name, +size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, DIEArray &die_offsets) { - if (!name || !name[0]) + if (name.empty()) return 0; DIEInfoArray die_info_array; @@ -584,7 +585,7 @@ return die_info_array.size(); } -size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(const char *name, +size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name, const dw_tag_t tag, DIEArray &die_offsets) { DIEInfoArray die_info_array; @@ -594,8 +595,8 @@ } size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( - const char *name, const dw_tag_t tag, const uint32_t qualified_name_hash, - DIEArray &die_offsets) { + llvm::StringRef name, const dw_tag_t tag, + const uint32_t qualified_name_hash, DIEArray &die_offsets) { DIEInfoArray die_info_array; if (FindByName(name, die_info_array)) DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, @@ -604,7 +605,7 @@ } size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( - const char *name, DIEArray &die_offsets, bool must_be_implementation) { + llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) { DIEInfoArray die_info_array; if (FindByName(name, die_info_array)) { if (must_be_implementation && @@ -628,9 +629,9 @@ return die_offsets.size(); } -size_t DWARFMappedHash::MemoryTable::FindByName(const char *name, +size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, DIEInfoArray &die_info_array) { - if (!name || !name[0]) + if (name.empty()) return 0; Pair kv_pair; Index: lldb/trunk/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp =================================================================== --- lldb/trunk/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ lldb/trunk/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1510,7 +1510,8 @@ method_die_offsets.clear(); if (m_using_apple_tables) { if (m_apple_objc_ap.get()) - m_apple_objc_ap->FindByName(class_name.GetCString(), method_die_offsets); + m_apple_objc_ap->FindByName(class_name.GetStringRef(), + method_die_offsets); } else { if (!m_indexed) Index(); @@ -2183,7 +2184,7 @@ basename)) basename = name_cstr; - m_apple_names_ap->FindByName(basename.data(), die_offsets); + m_apple_names_ap->FindByName(basename, die_offsets); } } else { // Index the DWARF if we haven't already @@ -2489,8 +2490,6 @@ // Remember how many sc_list are in the list before we search in case // we are appending the results to a variable list. - const char *name_cstr = name.GetCString(); - const uint32_t original_size = sc_list.GetSize(); DWARFDebugInfo *info = DebugInfo(); @@ -2511,7 +2510,8 @@ // want to canonicalize this (strip double spaces, etc. For now, we // just add all the // dies that we find by exact match. - num_matches = m_apple_names_ap->FindByName(name_cstr, die_offsets); + num_matches = + m_apple_names_ap->FindByName(name.GetStringRef(), die_offsets); for (uint32_t i = 0; i < num_matches; i++) { const DIERef &die_ref = die_offsets[i]; DWARFDIE die = info->GetDIE(die_ref); @@ -2527,7 +2527,7 @@ GetObjectFile()->GetModule()->ReportErrorIfModifyDetected( "the DWARF debug information has been modified (.apple_names " "accelerator table had bad die 0x%8.8x for '%s')", - die_ref.die_offset, name_cstr); + die_ref.die_offset, name.GetCString()); } } } @@ -2536,7 +2536,8 @@ if (parent_decl_ctx && parent_decl_ctx->IsValid()) return 0; // no selectors in namespaces - num_matches = m_apple_names_ap->FindByName(name_cstr, die_offsets); + num_matches = + m_apple_names_ap->FindByName(name.GetStringRef(), die_offsets); // Now make sure these are actually ObjC methods. In this case we can // simply look up the name, // and if it is an ObjC method name, we're good. @@ -2556,7 +2557,7 @@ GetObjectFile()->GetModule()->ReportError( "the DWARF debug information has been modified (.apple_names " "accelerator table had bad die 0x%8.8x for '%s')", - die_ref.die_offset, name_cstr); + die_ref.die_offset, name.GetCString()); } } die_offsets.clear(); @@ -2572,7 +2573,8 @@ // FIXME: Arrange the logic above so that we don't calculate the base // name twice: - num_matches = m_apple_names_ap->FindByName(name_cstr, die_offsets); + num_matches = + m_apple_names_ap->FindByName(name.GetStringRef(), die_offsets); for (uint32_t i = 0; i < num_matches; i++) { const DIERef &die_ref = die_offsets[i]; @@ -2626,7 +2628,7 @@ GetObjectFile()->GetModule()->ReportErrorIfModifyDetected( "the DWARF debug information has been modified (.apple_names " "accelerator table had bad die 0x%8.8x for '%s')", - die_ref.die_offset, name_cstr); + die_ref.die_offset, name.GetCString()); } } die_offsets.clear(); @@ -2850,8 +2852,7 @@ if (m_using_apple_tables) { if (m_apple_types_ap.get()) { - const char *name_cstr = name.GetCString(); - m_apple_types_ap->FindByName(name_cstr, die_offsets); + m_apple_types_ap->FindByName(name.GetStringRef(), die_offsets); } } else { if (!m_indexed) @@ -2944,8 +2945,7 @@ if (m_using_apple_tables) { if (m_apple_types_ap.get()) { - const char *name_cstr = name.GetCString(); - m_apple_types_ap->FindByName(name_cstr, die_offsets); + m_apple_types_ap->FindByName(name.GetStringRef(), die_offsets); } } else { if (!m_indexed) @@ -3013,8 +3013,7 @@ // get indexed and make their global DIE index list if (m_using_apple_tables) { if (m_apple_namespaces_ap.get()) { - const char *name_cstr = name.GetCString(); - m_apple_namespaces_ap->FindByName(name_cstr, die_offsets); + m_apple_namespaces_ap->FindByName(name.GetStringRef(), die_offsets); } } else { if (!m_indexed) @@ -3207,9 +3206,8 @@ if (m_using_apple_tables) { if (m_apple_types_ap.get()) { - const char *name_cstr = type_name.GetCString(); - m_apple_types_ap->FindCompleteObjCClassByName(name_cstr, die_offsets, - must_be_implementation); + m_apple_types_ap->FindCompleteObjCClassByName( + type_name.GetStringRef(), die_offsets, must_be_implementation); } } else { if (!m_indexed) @@ -3398,21 +3396,21 @@ DWARFMappedHash::eAtomTypeQualNameHash); if (has_tag && has_qualified_name_hash) { const char *qualified_name = dwarf_decl_ctx.GetQualifiedName(); - const uint32_t qualified_name_hash = - MappedHash::HashStringUsingDJB(qualified_name); + const uint32_t qualified_name_hash = llvm::djbHash(qualified_name); if (log) GetObjectFile()->GetModule()->LogMessage( log, "FindByNameAndTagAndQualifiedNameHash()"); m_apple_types_ap->FindByNameAndTagAndQualifiedNameHash( - type_name.GetCString(), tag, qualified_name_hash, die_offsets); + type_name.GetStringRef(), tag, qualified_name_hash, + die_offsets); } else if (has_tag) { if (log) GetObjectFile()->GetModule()->LogMessage(log, "FindByNameAndTag()"); - m_apple_types_ap->FindByNameAndTag(type_name.GetCString(), tag, + m_apple_types_ap->FindByNameAndTag(type_name.GetStringRef(), tag, die_offsets); } else { - m_apple_types_ap->FindByName(type_name.GetCString(), die_offsets); + m_apple_types_ap->FindByName(type_name.GetStringRef(), die_offsets); } } } else { Index: lldb/trunk/source/Target/ObjCLanguageRuntime.cpp =================================================================== --- lldb/trunk/source/Target/ObjCLanguageRuntime.cpp +++ lldb/trunk/source/Target/ObjCLanguageRuntime.cpp @@ -23,6 +23,7 @@ #include "lldb/Utility/Timer.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DJB.h" using namespace lldb; using namespace lldb_private; @@ -45,8 +46,7 @@ if (isa != 0) { m_isa_to_descriptor[isa] = descriptor_sp; // class_name is assumed to be valid - m_hash_to_isa_map.insert( - std::make_pair(MappedHash::HashStringUsingDJB(class_name), isa)); + m_hash_to_isa_map.insert(std::make_pair(llvm::djbHash(class_name), isa)); return true; } return false; @@ -180,8 +180,7 @@ } else { // Name hashes were provided, so use them to efficiently lookup name to // isa/descriptor - const uint32_t name_hash = - MappedHash::HashStringUsingDJB(name.GetCString()); + const uint32_t name_hash = llvm::djbHash(name.GetStringRef()); std::pair range = m_hash_to_isa_map.equal_range(name_hash); for (HashToISAIterator range_pos = range.first; range_pos != range.second;