diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -91,6 +91,107 @@ void need(size_t Size); }; +/// Demangle information structure +struct Demangler { + /// Initialize the information structure we use to pass around information + /// + /// \param Mangled String to demangle + Demangler(const char *Mangled); + + /// Extract and demangle the mangled symbol and append it to the output string + /// + /// \param Decl output buffer to write the demangled name + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#name_mangling + /// \see https://dlang.org/spec/abi.html#MangledName + const char *parseMangle(OutputString *Decl); + +private: + /// Extract and demangle a given mangled symbol and append it to the output + /// string + /// + /// \param Decl output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#name_mangling + /// \see https://dlang.org/spec/abi.html#MangledName + const char *parseMangle(OutputString *Decl, const char *Mangled); + + /// Extract the number from a given string + /// + /// \param Mangled string to extract the number + /// \param Ret assigned result value + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \note A result larger than UINT_MAX is considered a failure + /// + /// \see https://dlang.org/spec/abi.html#Number + const char *decodeNumber(const char *Mangled, unsigned long *Ret); + + /// Check whether it is a function calling convention + /// + /// \param Mangled string to extract the function calling convention + /// + /// \return true on success, false otherwise + /// + /// \see https://dlang.org/spec/abi.html#CallConvention + /// \see https://dlang.org/spec/abi.html#function_calling_conventions + bool isCallConvention(const char *Mangled); + + /// Check whether it is the beginning of a symbol name + /// + /// \param Mangled string to extract the symbol name + /// + /// \return true on success, false otherwise + /// + /// \see https://dlang.org/spec/abi.html#SymbolName + bool isSymbolName(const char *Mangled); + + /// Extract and demangle an identifier from a given mangled symbol append it + /// to the output string + /// + /// \param Decl output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#SymbolName + const char *parseIdentifier(OutputString *Decl, const char *Mangled); + + /// Extract and demangle the plain identifier from a given mangled symbol and + /// prepend/append it to the output string, with a special treatment for some + /// magic compiler generated symbols. + /// + /// \param Decl output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// \param Len length of the mangled symbol name + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#LName + const char *parseLName(OutputString *Decl, const char *Mangled, + unsigned long Len); + + /// Extract and demangle the qualified symbol from a given mangled symbol + /// append it to the output string + /// + /// \param Decl output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#QualifiedName + const char *parseQualified(OutputString *Decl, const char *Mangled); + + /// The string we are demangling. + const char *Str; +}; + } // namespace void OutputString::need(size_t Size) { @@ -169,6 +270,245 @@ } } +const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { + // Return nullptr if trying to extract something that isn't a digit + if (Mangled == nullptr || !std::isdigit(*Mangled)) + return nullptr; + + unsigned long Val = 0; + + while (std::isdigit(*Mangled)) { + unsigned long Digit = Mangled[0] - '0'; + + // Check for overflow. + if (Val > (UINT_MAX - Digit) / 10) + return nullptr; + + Val = Val * 10 + Digit; + Mangled++; + } + + if (*Mangled == '\0') + return nullptr; + + *Ret = Val; + return Mangled; +} + +bool Demangler::isCallConvention(const char *Mangled) { + switch (*Mangled) { + case 'F': + case 'U': + case 'V': + case 'W': + case 'R': + case 'Y': + return true; + + default: + return false; + } +} + +bool Demangler::isSymbolName(const char *Mangled) { + if (std::isdigit(*Mangled)) + return true; + + // TODO: Handle symbol back references and template instances + return false; +} + +const char *Demangler::parseMangle(OutputString *Decl, const char *Mangled) { + /* A D mangled symbol is comprised of both scope and type information. + MangleName: + _D QualifiedName Type + _D QualifiedName Z + ^ + The caller should have guaranteed that the start pointer is at the + above location. + Note that type is never a function type, but only the return type of + a function or the type of a variable. + */ + Mangled += 2; + + Mangled = parseQualified(Decl, Mangled); + + if (Mangled != nullptr) { + // Artificial symbols end with 'Z' and have no type. + if (*Mangled == 'Z') + Mangled++; + else { + // TODO: Implement symbols with types + return nullptr; + } + } + + return Mangled; +} + +const char *Demangler::parseQualified(OutputString *Decl, const char *Mangled) { + /* Qualified names are identifiers separated by their encoded length. + Nested functions also encode their argument types without specifying + what they return. + QualifiedName: + SymbolFunctionName + SymbolFunctionName QualifiedName + ^ + SymbolFunctionName: + SymbolName + SymbolName TypeFunctionNoReturn + SymbolName M TypeFunctionNoReturn + SymbolName M TypeModifiers TypeFunctionNoReturn + The start pointer should be at the above location. + */ + size_t N = 0; + do { + /* Skip over anonymous symbols. */ + if(*Mangled == '0') + { + do Mangled++; + while (*Mangled == '0'); + + continue; + } + + if (N++) + Decl->append('.'); + + Mangled = parseIdentifier(Decl, Mangled); + + if (Mangled && (*Mangled == 'M' || isCallConvention(Mangled))) { + // TODO: Implement type function parsing + return nullptr; + } + } while (Mangled && isSymbolName(Mangled)); + + return Mangled; +} + +const char *Demangler::parseIdentifier(OutputString *Decl, + const char *Mangled) { + unsigned long Len; + + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + // TODO: Parse back references and lengthless template instances + + const char *Endptr = decodeNumber(Mangled, &Len); + + if (Endptr == nullptr || Len == 0) + return nullptr; + + if (strlen(Endptr) < Len) + return nullptr; + + Mangled = Endptr; + + // TODO: Parse template instances with a length prefix + + /* There can be multiple different declarations in the same function that have + the same mangled name. To make the mangled names unique, a fake parent in + the form `__Sddd' is added to the symbol. */ + if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { + const char *NumPtr = Mangled + 3; + while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) + NumPtr++; + + if (Mangled + Len == NumPtr) { + // Skip over the fake parent + Mangled += Len; + return parseIdentifier(Decl, Mangled); + } + + // else demangle it as a plain identifier + } + + return parseLName(Decl, Mangled, Len); +} + +const char *Demangler::parseLName(OutputString *Decl, const char *Mangled, + unsigned long Len) { + switch (Len) { + case 6: + if (strncmp(Mangled, "__ctor", Len) == 0) { + /* Constructor symbol for a class/struct. */ + Decl->append("this"); + Mangled += Len; + return Mangled; + } else if (strncmp(Mangled, "__dtor", Len) == 0) { + /* Destructor symbol for a class/struct. */ + Decl->append("~this"); + Mangled += Len; + return Mangled; + } else if (strncmp(Mangled, "__initZ", Len + 1) == 0) { + /* The static initialiser for a given symbol. */ + Decl->prepend("initializer for "); + Decl->setLength(Decl->getLength() - 1); + Mangled += Len; + return Mangled; + } else if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) { + /* The vtable symbol for a given class. */ + Decl->prepend("vtable for "); + Decl->setLength(Decl->getLength() - 1); + Mangled += Len; + return Mangled; + } + break; + + case 7: + if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) { + /* The classinfo symbol for a given class. */ + Decl->prepend("ClassInfo for "); + Decl->setLength(Decl->getLength() - 1); + Mangled += Len; + return Mangled; + } + break; + + case 10: + if (strncmp(Mangled, "__postblitMFZ", Len + 3) == 0) { + /* Postblit symbol for a struct. */ + Decl->append("this(this)"); + Mangled += Len + 3; + return Mangled; + } + break; + + case 11: + if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { + /* The interface symbol for a given class. */ + Decl->prepend("Interface for "); + Decl->setLength(Decl->getLength() - 1); + Mangled += Len; + return Mangled; + } + break; + + case 12: + if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) { + /* The ModuleInfo symbol for a given module. */ + Decl->prepend("ModuleInfo for "); + Decl->setLength(Decl->getLength() - 1); + Mangled += Len; + return Mangled; + } + break; + } + + Decl->append(Mangled, Len); + Mangled += Len; + + return Mangled; +} + +Demangler::Demangler(const char *Mangled) + : Str(Mangled) {} + +const char *Demangler::parseMangle(OutputString *Decl) { + return parseMangle(Decl, this->Str); +} + char *llvm::dlangDemangle(const char *MangledName) { OutputString Decl; char *Demangled = nullptr; @@ -183,6 +523,14 @@ if (strcmp(MangledName, "_Dmain") == 0) { Decl.append("D main"); + } else { + + Demangler D = Demangler(MangledName); + MangledName = D.parseMangle(&Decl); + + /* Check that the entire symbol was successfully demangled. */ + if (MangledName == nullptr || *MangledName != '\0') + Decl.free(); } if (Decl.getLength() > 0) { diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -23,7 +23,18 @@ }; ExpectedVal ExpectedArray[] = { - {"_Dmain", "D main"} + {"_Dmain", "D main"}, + {"_DDD", nullptr}, + {"_D88", nullptr}, + {"_D8demangleZ", "demangle"}, + {"_D8demangle4testZ", "demangle.test"}, + {"_D8demangle9anonymous0Z", "demangle.anonymous"}, + {"_D8demangle9anonymous03fooZ", "demangle.anonymous.foo"}, + {"_D8demangle4test6__initZ", "initializer for demangle.test"}, + {"_D8demangle4test6__vtblZ", "vtable for demangle.test"}, + {"_D8demangle4test7__ClassZ", "ClassInfo for demangle.test"}, + {"_D8demangle4test11__InterfaceZ", "Interface for demangle.test"}, + {"_D8demangle4test12__ModuleInfoZ", "ModuleInfo for demangle.test"} }; for (ExpectedVal Val : ExpectedArray) {