Index: llvm/lib/Demangle/DLangDemangle.cpp =================================================================== --- llvm/lib/Demangle/DLangDemangle.cpp +++ llvm/lib/Demangle/DLangDemangle.cpp @@ -133,6 +133,41 @@ /// \see https://dlang.org/spec/abi.html#Number const char *decodeNumber(const char *Mangled, unsigned long *Ret); + /// Extract the back reference position from a given string + /// + /// \param Mangled string to extract the back reference position + /// \param Ret assigned result value + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \note a result <= 0 is a failure + /// + /// \see https://dlang.org/spec/abi.html#back_ref + /// \see https://dlang.org/spec/abi.html#NumberBackRef + const char *decodeBackrefPos(const char *Mangled, long *Ret); + + /// Extract the symbol pointed by the back reference form a given string + /// + /// \param Mangled string to extract the back reference position + /// \param Ret assigned result value + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#back_ref + const char *decodeBackref(const char *Mangled, const char **Ret); + + /// Extract and demangle backreferenced symbol from a given mangled symbol + /// and append it to the output string + /// + /// \param Decl output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#back_ref + /// \see https://dlang.org/spec/abi.html#IdentifierBackRef + const char *parseSymbolBackref(OutputString *Decl, const char *Mangled); + /// Check whether it is a function calling convention /// /// \param Mangled string to extract the function calling convention @@ -321,12 +356,112 @@ } } +const char *Demangler::decodeBackrefPos(const char *Mangled, long *Ret) { + // Return nullptr if trying to extract something that isn't a digit + if (Mangled == nullptr || !std::isalpha(*Mangled)) + return nullptr; + + /* Any identifier or non-basic type that has been emitted to the mangled + symbol before will not be emitted again, but is referenced by a special + sequence encoding the relative position of the original occurrence in the + mangled symbol name. + Numbers in back references are encoded with base 26 by upper case letters + A-Z for higher digits but lower case letters a-z for the last digit. + NumberBackRef: + [a-z] + [A-Z] NumberBackRef + ^ + */ + unsigned long Val = 0; + + while (std::isalpha(*Mangled)) { + // Check for overflow + if (Val > (ULONG_MAX - 25) / 26) + break; + + Val *= 26; + + if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { + Val += Mangled[0] - 'a'; + if ((long)Val <= 0) + break; + *Ret = Val; + return Mangled + 1; + } + + Val += Mangled[0] - 'A'; + Mangled++; + } + + return nullptr; +} + +const char *Demangler::decodeBackref(const char *Mangled, const char **Ret) { + *Ret = nullptr; + + if (Mangled == nullptr || *Mangled != 'Q') + return nullptr; + + // Position of 'Q' + const char *Qpos = Mangled; + long RefPos; + Mangled++; + + Mangled = decodeBackrefPos(Mangled, &RefPos); + if (Mangled == nullptr) + return nullptr; + + if (RefPos > Qpos - Str) + return nullptr; + + /* Set the position of the back reference. */ + *Ret = Qpos - RefPos; + + return Mangled; +} + +const char *Demangler::parseSymbolBackref(OutputString *Decl, + const char *Mangled) { + /* An identifier back reference always points to a digit 0 to 9. + IdentifierBackRef: + Q NumberBackRef + ^ + */ + const char *Backref; + unsigned long Len; + + // Get position of the back reference + Mangled = decodeBackref(Mangled, &Backref); + + // Must point to a simple identifier + Backref = decodeNumber(Backref, &Len); + if (Backref == nullptr || strlen(Backref) < Len) + return nullptr; + + Backref = parseLName(Decl, Backref, Len); + if (Backref == nullptr) + return nullptr; + + return Mangled; +} + bool Demangler::isSymbolName(const char *Mangled) { + long Ret; + const char *Qref = Mangled; + if (std::isdigit(*Mangled)) return true; - // TODO: Handle symbol back references and template instances - return false; + // TODO: Handle template instances + + if (*Mangled != 'Q') + return false; + + Mangled = decodeBackrefPos(Mangled + 1, &Ret); + if (Mangled == nullptr || Ret > Qref - Str) + return false; + + return std::isdigit(Qref[-Ret]); } const char *Demangler::parseMangle(OutputString *Decl, const char *Mangled) { @@ -407,7 +542,10 @@ if (Mangled == nullptr || *Mangled == '\0') return nullptr; - // TODO: Parse back references and lengthless template instances + if (*Mangled == 'Q') + return parseSymbolBackref(Decl, Mangled); + + // TODO: Parse lengthless template instances const char *Endptr = decodeNumber(Mangled, &Len); Index: llvm/unittests/Demangle/DLangDemangleTest.cpp =================================================================== --- llvm/unittests/Demangle/DLangDemangleTest.cpp +++ llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -38,7 +38,9 @@ {"_D8demangle1ii", "demangle.i"}, {"_D8demangle2siOi", "demangle.si"}, {"_D8demangle1re", "demangle.r"}, - {"_D8demangle1iinvalidtypeseq", nullptr} + {"_D8demangle1iinvalidtypeseq", nullptr}, + {"_D8demangle3ABCQeQg1ai", "demangle.ABC.ABC.ABC.a"}, + {"_D8demangle3ABCQeQaaaa1ai", nullptr} }; for (ExpectedVal Val : ExpectedArray) {