Index: llvm/lib/Demangle/DLangDemangle.cpp =================================================================== --- llvm/lib/Demangle/DLangDemangle.cpp +++ llvm/lib/Demangle/DLangDemangle.cpp @@ -14,12 +14,341 @@ //===----------------------------------------------------------------------===// #include "llvm/Demangle/Demangle.h" +#include "llvm/Demangle/StringView.h" #include "llvm/Demangle/Utility.h" #include +#include using namespace llvm; using llvm::itanium_demangle::OutputBuffer; +using llvm::itanium_demangle::StringView; + +namespace { + +/// Demangle information structure. +struct Demangler { + /// Initialize the information structure we use to pass around information. + /// + /// \param Mangled String to demangle. + Demangler(const char *Mangled); + + /// Extract and demangle the mangled symbol and append it to the output + /// string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#name_mangling . + /// \see https://dlang.org/spec/abi.html#MangledName . + const char *parseMangle(OutputBuffer *Demangled); + +private: + /// Extract and demangle a given mangled symbol and append it to the output + /// string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#name_mangling . + /// \see https://dlang.org/spec/abi.html#MangledName . + const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); + + /// Extract the number from a given string. + /// + /// \param Mangled string to extract the number. + /// \param Ret assigned result value. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \note A result larger than UINT_MAX is considered a failure. + /// + /// \see https://dlang.org/spec/abi.html#Number . + const char *decodeNumber(const char *Mangled, unsigned long *Ret); + + /// Check whether it is a function calling convention. + /// + /// \param Mangled string to extract the function calling convention. + /// + /// \return True on success, false otherwise. + /// + /// \see https://dlang.org/spec/abi.html#CallConvention . + /// \see https://dlang.org/spec/abi.html#function_calling_conventions . + bool isCallConvention(const char *Mangled); + + /// Check whether it is the beginning of a symbol name. + /// + /// \param Mangled string to extract the symbol name. + /// + /// \return true on success, false otherwise. + /// + /// \see https://dlang.org/spec/abi.html#SymbolName . + bool isSymbolName(const char *Mangled); + + /// Extract and demangle an identifier from a given mangled symbol append it + /// to the output string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#SymbolName . + const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the plain identifier from a given mangled symbol and + /// prepend/append it to the output string, with a special treatment for some + /// magic compiler generated symbols. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// \param Len Length of the mangled symbol name. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#LName . + const char *parseLName(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len); + + /// Extract and demangle the qualified symbol from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#QualifiedName . + const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); + + /// The string we are demangling. + const char *Str; +}; + +} // namespace + +const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { + // Return nullptr if trying to extract something that isn't a digit. + if (Mangled == nullptr || !std::isdigit(*Mangled)) + return nullptr; + + unsigned long Val = 0; + + do { + unsigned long Digit = Mangled[0] - '0'; + + // Check for overflow. + if (Val > (std::numeric_limits::max() - Digit) / 10) + return nullptr; + + Val = Val * 10 + Digit; + ++Mangled; + } while (std::isdigit(*Mangled)); + + if (*Mangled == '\0') + return nullptr; + + *Ret = Val; + return Mangled; +} + +bool Demangler::isCallConvention(const char *Mangled) { + switch (*Mangled) { + case 'F': + case 'U': + case 'V': + case 'W': + case 'R': + case 'Y': + return true; + + default: + return false; + } +} + +bool Demangler::isSymbolName(const char *Mangled) { + if (std::isdigit(*Mangled)) + return true; + + // TODO: Handle symbol back references and template instances. + return false; +} + +const char *Demangler::parseMangle(OutputBuffer *Demangled, + const char *Mangled) { + // A D mangled symbol is comprised of both scope and type information. + // MangleName: + // _D QualifiedName Type + // _D QualifiedName Z + // ^ + // The caller should have guaranteed that the start pointer is at the + // above location. + // Note that type is never a function type, but only the return type of + // a function or the type of a variable. + Mangled += 2; + + Mangled = parseQualified(Demangled, Mangled); + + if (Mangled != nullptr) { + // Artificial symbols end with 'Z' and have no type. + if (*Mangled == 'Z') + ++Mangled; + else { + // TODO: Implement symbols with types. + return nullptr; + } + } + + return Mangled; +} + +const char *Demangler::parseQualified(OutputBuffer *Demangled, + const char *Mangled) { + // Qualified names are identifiers separated by their encoded length. + // Nested functions also encode their argument types without specifying + // what they return. + // QualifiedName: + // SymbolFunctionName + // SymbolFunctionName QualifiedName + // ^ + // SymbolFunctionName: + // SymbolName + // SymbolName TypeFunctionNoReturn + // SymbolName M TypeFunctionNoReturn + // SymbolName M TypeModifiers TypeFunctionNoReturn + // The start pointer should be at the above location. + size_t N = 0; + do { + // Skip over anonymous symbols. + if (*Mangled == '0') { + do + ++Mangled; + while (*Mangled == '0'); + + continue; + } + + if (N++) + *Demangled << '.'; + + Mangled = parseIdentifier(Demangled, Mangled); + + if (Mangled && (*Mangled == 'M' || isCallConvention(Mangled))) { + // TODO: Implement type function parsing. + return nullptr; + } + } while (Mangled && isSymbolName(Mangled)); + + return Mangled; +} + +const char *Demangler::parseIdentifier(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Len; + + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + // TODO: Parse back references and lengthless template instances. + + const char *Endptr = decodeNumber(Mangled, &Len); + + if (Endptr == nullptr || Len == 0) + return nullptr; + + if (strlen(Endptr) < Len) + return nullptr; + + Mangled = Endptr; + + // TODO: Parse template instances with a length prefix. + + // There can be multiple different declarations in the same function that + // have the same mangled name. To make the mangled names unique, a fake + // parent in the form `__Sddd' is added to the symbol. + if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { + const char *NumPtr = Mangled + 3; + while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) + ++NumPtr; + + if (Mangled + Len == NumPtr) { + // Skip over the fake parent. + Mangled += Len; + return parseIdentifier(Demangled, Mangled); + } + + // Else demangle it as a plain identifier. + } + + return parseLName(Demangled, Mangled, Len); +} + +const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len) { + switch (Len) { + case 6: + if (strncmp(Mangled, "__initZ", Len + 1) == 0) { + // The static initializer for a given symbol. + Demangled->prepend("initializer for "); + Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); + Mangled += Len; + return Mangled; + } + if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) { + // The vtable symbol for a given class. + Demangled->prepend("vtable for "); + Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); + Mangled += Len; + return Mangled; + } + break; + + case 7: + if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) { + // The classinfo symbol for a given class. + Demangled->prepend("ClassInfo for "); + Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); + Mangled += Len; + return Mangled; + } + break; + + case 11: + if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { + // The interface symbol for a given class. + Demangled->prepend("Interface for "); + Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); + Mangled += Len; + return Mangled; + } + break; + + case 12: + if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) { + // The ModuleInfo symbol for a given module. + Demangled->prepend("ModuleInfo for "); + Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); + Mangled += Len; + return Mangled; + } + break; + } + + *Demangled << StringView(Mangled, Len); + Mangled += Len; + + return Mangled; +} + +Demangler::Demangler(const char *Mangled) : Str(Mangled) {} + +const char *Demangler::parseMangle(OutputBuffer *Demangled) { + return parseMangle(Demangled, this->Str); +} char *llvm::dlangDemangle(const char *MangledName) { if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) @@ -29,8 +358,19 @@ if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) return nullptr; - if (strcmp(MangledName, "_Dmain") == 0) + if (strcmp(MangledName, "_Dmain") == 0) { Demangled << "D main"; + } else { + + Demangler D = Demangler(MangledName); + MangledName = D.parseMangle(&Demangled); + + // Check that the entire symbol was successfully demangled. + if (MangledName == nullptr || *MangledName != '\0') { + std::free(Demangled.getBuffer()); + return nullptr; + } + } // OutputBuffer's internal buffer is not null terminated and therefore we need // to add it to comply with C null terminated strings. @@ -40,6 +380,6 @@ return Demangled.getBuffer(); } - free(Demangled.getBuffer()); + std::free(Demangled.getBuffer()); return nullptr; } Index: llvm/unittests/Demangle/DLangDemangleTest.cpp =================================================================== --- llvm/unittests/Demangle/DLangDemangleTest.cpp +++ llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -26,8 +26,22 @@ EXPECT_STREQ(Demangled, GetParam().second); } -INSTANTIATE_TEST_SUITE_P(DLangDemangleTest, DLangDemangleTestFixture, - testing::Values(std::make_pair("_Dmain", "D main"), - std::make_pair(nullptr, nullptr), - std::make_pair("_Z", nullptr), - std::make_pair("_DDD", nullptr))); +INSTANTIATE_TEST_SUITE_P( + DLangDemangleTest, DLangDemangleTestFixture, + testing::Values( + std::make_pair("_Dmain", "D main"), std::make_pair(nullptr, nullptr), + std::make_pair("_Z", nullptr), std::make_pair("_DDD", nullptr), + std::make_pair("_D88", nullptr), + std::make_pair("_D8demangleZ", "demangle"), + std::make_pair("_D8demangle4testZ", "demangle.test"), + std::make_pair("_D8demangle9anonymous0Z", "demangle.anonymous"), + std::make_pair("_D8demangle9anonymous03fooZ", "demangle.anonymous.foo"), + std::make_pair("_D8demangle4test6__initZ", + "initializer for demangle.test"), + std::make_pair("_D8demangle4test6__vtblZ", "vtable for demangle.test"), + std::make_pair("_D8demangle4test7__ClassZ", + "ClassInfo for demangle.test"), + std::make_pair("_D8demangle4test11__InterfaceZ", + "Interface for demangle.test"), + std::make_pair("_D8demangle4test12__ModuleInfoZ", + "ModuleInfo for demangle.test")));