diff --git a/llvm/include/llvm/Demangle/RustDemangle.h b/llvm/include/llvm/Demangle/RustDemangle.h --- a/llvm/include/llvm/Demangle/RustDemangle.h +++ b/llvm/include/llvm/Demangle/RustDemangle.h @@ -12,6 +12,7 @@ #include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/StringView.h" #include "llvm/Demangle/Utility.h" +#include namespace llvm { namespace rust_demangle { @@ -37,6 +38,10 @@ // Position in the input string. size_t Position; + // When true, print methods append the output to the stream. + // When false, the output is suppressed. + bool Print; + // True if an error occurred. bool Error; @@ -49,20 +54,61 @@ bool demangle(StringView MangledName); private: - void demanglePath(); + void demanglePath(bool InType); + void demangleImplPath(bool InType); + void demangleGenericArg(); + void demangleType(); + + template void demangleBackRef(Callable Demangler) { + uint64_t BackRef = parseBase62Number(); + if (Error || BackRef >= Position) { + Error = true; + return; + } + + if (!Print) + return; + + const size_t SavedPosition = Position; + Position = BackRef; + Demangler(); + Position = SavedPosition; + } Identifier parseIdentifier(); - void parseOptionalBase62Number(char Tag); + uint64_t parseOptionalBase62Number(char Tag); uint64_t parseBase62Number(); uint64_t parseDecimalNumber(); + template + void withPrint(const bool NewPrint, Callable Demangler) { + const auto SavedPrint = Print; + Print = NewPrint; + Demangler(); + Print = SavedPrint; + } + + void print(char C) { + if (Error || !Print) + return; + + Output += C; + } + void print(StringView S) { - if (Error) + if (Error || !Print) return; Output += S; } + void printDecimalNumber(uint64_t N) { + if (Error || !Print) + return; + + Output << N; + } + char look() const { if (Error || Position >= Input.size()) return 0; diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp --- a/llvm/lib/Demangle/RustDemangle.cpp +++ b/llvm/lib/Demangle/RustDemangle.cpp @@ -89,12 +89,30 @@ return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; } +/// Returns true if C starts a . +static inline bool isPath(const char C) { + switch (C) { + case 'C': + case 'M': + case 'X': + case 'Y': + case 'N': + case 'I': + case 'B': + return true; + default: + return false; + } +} + // Demangles Rust v0 mangled symbol. Returns true when successful, and false // otherwise. The demangled symbol is stored in Output field. It is // responsibility of the caller to free the memory behind the output stream. // // = "_R" [] +// = bool Demangler::demangle(StringView Mangled) { + Print = true; Position = 0; Error = false; RecursionLevel = 0; @@ -105,9 +123,10 @@ } Input = Mangled; - demanglePath(); + demanglePath(false /* InType */); - // FIXME parse optional . + if (isPath(look())) + withPrint(false, [&] { demanglePath(false /* InType */); }); if (Position != Input.size()) Error = true; @@ -115,6 +134,8 @@ return !Error; } +// Demangles a path. InType indicates that we are demangling a type path. +// // = "C" // crate root // | "M" // (inherent impl) // | "X" // (trait impl) @@ -127,7 +148,7 @@ // | "S" // shim // | // other special namespaces // | // internal namespaces -void Demangler::demanglePath() { +void Demangler::demanglePath(bool InType) { if (Error || RecursionLevel >= MaxRecursionLevel) { Error = true; return; @@ -141,28 +162,84 @@ print(Ident.Name); break; } + case 'M': { + withPrint(false, [&] { demangleImplPath(InType); }); + print('<'); + demangleType(); + print('>'); + break; + } + case 'X': { + withPrint(false, [&] { demangleImplPath(InType); }); + print('<'); + demangleType(); + print(" as "); + demanglePath(true /* InType */); + print('>'); + break; + } + case 'Y': { + print('<'); + demangleType(); + print(" as "); + demanglePath(true /* InType */); + print('>'); + break; + } case 'N': { char NS = consume(); if (!isLower(NS) && !isUpper(NS)) { Error = true; break; } - demanglePath(); + demanglePath(InType); - parseOptionalBase62Number('s'); + uint64_t Disambiguator = parseOptionalBase62Number('s'); Identifier Ident = parseIdentifier(); - if (!Ident.empty()) { - // FIXME print special namespaces: - // * "C" closures - // * "S" shim + if (isUpper(NS)) { + // Special namespaces + print("::{"); + if (NS == 'C') + print("closure"); + else if (NS == 'S') + print("shim"); + else + print(NS); + if (!Ident.empty()) { + print(":"); + print(Ident.Name); + } + print('#'); + printDecimalNumber(Disambiguator); + print('}'); + } else { + // Implementation internal namespaces. + if (!Ident.empty()) { + print("::"); + print(Ident.Name); + } + } + break; + } + case 'I': { + demanglePath(InType); + if (!InType) print("::"); - print(Ident.Name); + print('<'); + for (size_t I = 0; !Error && !consumeIf('E'); ++I) { + if (I >= 1) + print(", "); + demangleGenericArg(); } + print('>'); + break; + } + case 'B': { + demangleBackRef([&] { demanglePath(InType); }); break; } default: - // FIXME parse remaining productions. Error = true; break; } @@ -170,6 +247,120 @@ RecursionLevel -= 1; } +// = [] +// = "s" +void Demangler::demangleImplPath(bool InType) { + parseOptionalBase62Number('s'); + demanglePath(InType); +} + +// = +// | +// | "K" +// = "L" +void Demangler::demangleGenericArg() { + if (consumeIf('L')) + Error = true; // FIXME demangle lifetime + else if (consumeIf('K')) + Error = true; // FIXME demangle const + else + demangleType(); +} + +static const char *parseBasicType(char C) { + switch (C) { + case 'a': + return "i8"; + case 'b': + return "bool"; + case 'c': + return "char"; + case 'd': + return "f64"; + case 'e': + return "str"; + case 'f': + return "f32"; + case 'h': + return "u8"; + case 'i': + return "isize"; + case 'j': + return "usize"; + case 'l': + return "i32"; + case 'm': + return "u32"; + case 'n': + return "i128"; + case 'o': + return "u128"; + case 's': + return "i16"; + case 't': + return "u16"; + case 'u': + return "()"; + case 'v': + return "..."; + case 'x': + return "i64"; + case 'y': + return "u64"; + case 'z': + return "!"; + case 'p': + return "_"; + default: + return nullptr; + } +} + +// = | +// | // named type +// | "A" // [T; N] +// | "S" // [T] +// | "T" {} "E" // (T1, T2, T3, ...) +// | "R" [] // &T +// | "Q" [] // &mut T +// | "P" // *const T +// | "O" // *mut T +// | "F" // fn(...) -> ... +// | "D" // dyn Trait + Send + 'a +// | // backref +void Demangler::demangleType() { + if (Error || RecursionLevel >= MaxRecursionLevel) { + Error = true; + return; + } + RecursionLevel += 1; + + char C = consume(); + if (const char *BasicType = parseBasicType(C)) { + print(BasicType); + } else { + switch (C) { + case 'C': + case 'M': + case 'X': + case 'N': + case 'I': + Position -= 1; + demanglePath(true /* InType */); + break; + case 'B': + demangleBackRef([&] { demangleType(); }); + break; + default: + // FIXME demangle remaining productions. + Error = true; + break; + } + } + + RecursionLevel -= 1; +} + // = ["u"] ["_"] Identifier Demangler::parseIdentifier() { bool Punycode = consumeIf('u'); @@ -195,11 +386,16 @@ } // Parses optional base 62 number. The presence of a number is determined using -// Tag. -void Demangler::parseOptionalBase62Number(char Tag) { - // Parsing result is currently unused. - if (consumeIf(Tag)) - parseBase62Number(); +// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise. +uint64_t Demangler::parseOptionalBase62Number(char Tag) { + if (!consumeIf(Tag)) + return 0; + + uint64_t N = parseBase62Number(); + if (Error || !addAssign(N, 1)) + return 0; + + return N; } // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by diff --git a/llvm/test/Demangle/rust.test b/llvm/test/Demangle/rust.test --- a/llvm/test/Demangle/rust.test +++ b/llvm/test/Demangle/rust.test @@ -9,6 +9,130 @@ CHECK: a::b::c _RNvNvC1a1b1c +; Closure namespace + +CHECK: ns::f::{closure#0} + _RNCNvCs21hi0yVfW1J_2ns1f0B3_ + +CHECK: ns::f::{closure#1} + _RNCNvCs21hi0yVfW1J_2ns1fs_0B3_ + +CHECK: ns::f::{closure#1}::{closure#0} + _RNCNCNvCs21hi0yVfW1J_2ns1fs_00B5_ + +CHECK: ns::f::{closure#1}::g + _RNvNCNvCs21hi0yVfW1J_2ns1fs_01g + +; Shim namespace + +CHECK: >::call_once::{shim:vtable#0} + _RNSNvYNCNvCs21hi0yVfW1J_2ns4shim0INtC4core6FnOnceuE9call_once6vtable + +; Unrecognized special namespace + +CHECK: a::{B:c#10} + _RNBC1as8_1c + +; Inherent impl + +CHECK: ::new + _RNvMCshGpAVYOtgW1_1pNtB2_8SmallStr3new + +CHECK: ::len + _RNvMs_CshGpAVYOtgW1_1pNtB4_8SmallStr3len + +; Trait definition + +CHECK: ::max + _RNvYjNtNtCskrsM4FCwAVA_4core3cmp3Ord3maxCshGpAVYOtgW1_1p + +CHECK: ::clone_from + _RNvYNtCshGpAVYOtgW1_1p8SmallStrNtNtCskrsM4FCwAVA_4core5clone5Clone10clone_fromB4_ + +; Trait impl + +CHECK: ::default + _RNvXs1_CshGpAVYOtgW1_1pNtB5_8SmallStrNtNtCskrsM4FCwAVA_4core7default7Default7defaultB5_ + +CHECK: >::from + _RNvXs2_CshGpAVYOtgW1_1pNtB5_8SmallStrINtNtCskrsM4FCwAVA_4core7convert4FromNtNtCs2beRm3ZGvIF_5alloc6string6StringE4from + +; Generic arguments + +CHECK: >::f::g + _RNvNvMCshGpAVYOtgW1_1pINtB4_1SpppE1f1g + +CHECK: >::insert + _RNvMCshGpAVYOtgW1_1aINtB2_3MapmcE6insertB2_ + +CHECK: >::len + _RNvMs_NtCs2beRm3ZGvIF_5alloc3vecINtB4_3VechE3lenCshGpAVYOtgW1_1a + +; Basic types + +CHECK: b::basic:: + _RINvC1b5basicaE + +CHECK: b::basic:: + _RINvC1b5basicbE + +CHECK: b::basic:: + _RINvC1b5basiccE + +CHECK: b::basic:: + _RINvC1b5basicdE + +CHECK: b::basic:: + _RINvC1b5basiceE + +CHECK: b::basic:: + _RINvC1b5basicfE + +CHECK: b::basic:: + _RINvC1b5basichE + +CHECK: b::basic:: + _RINvC1b5basiciE + +CHECK: b::basic:: + _RINvC1b5basicjE + +CHECK: b::basic:: + _RINvC1b5basiclE + +CHECK: b::basic:: + _RINvC1b5basicmE + +CHECK: b::basic:: + _RINvC1b5basicnE + +CHECK: b::basic:: + _RINvC1b5basicoE + +CHECK: b::basic:: + _RINvC1b5basicsE + +CHECK: b::basic:: + _RINvC1b5basictE + +CHECK: b::basic::<()> + _RINvC1b5basicuE + +CHECK: b::basic::<...> + _RINvC1b5basicvE + +CHECK: b::basic:: + _RINvC1b5basicxE + +CHECK: b::basic:: + _RINvC1b5basicyE + +CHECK: b::basic:: + _RINvC1b5basiczE + +CHECK: b::basic::<_> + _RINvC1b5basicpE + ; Invalid mangled characters CHECK: _RNvC2a.1c @@ -17,6 +141,14 @@ CHECK: _RNvC2a$1c _RNvC2a$1c +CHECK: _RNvCs._1a1b + _RNvCs._1a1b + +; Invalid namespace + +CHECK: _RNvN_NvC2ns1i01f + _RNvN_NvC2ns1i01f + ; Invalid identifier length (UINT64_MAX + 3, which happens to be ok after a wraparound). CHECK: _RNvC2ab18446744073709551618xy @@ -41,3 +173,23 @@ CHECK: _RNvC1a20abc _RNvC1a20abc + +; Invalid backreferences + +CHECK: _RB_ + _RB_ + +CHECK: _RB9_ + _RB9_ + +CHECK: _RB_1a + _RB_1a + +CHECK: _RB_RMRX + _RB_RMRX + +CHECK: _RNvNvB_1a1b1c + _RNvNvB_1a1b1c + +CHECK: _RINvCshGpAVYOtgW1_1a1fTlllEB_EB2_ + _RINvCshGpAVYOtgW1_1a1fTlllEB_EB2_