diff --git a/llvm/include/llvm/Demangle/RustDemangle.h b/llvm/include/llvm/Demangle/RustDemangle.h --- a/llvm/include/llvm/Demangle/RustDemangle.h +++ b/llvm/include/llvm/Demangle/RustDemangle.h @@ -28,6 +28,30 @@ bool empty() const { return Name.empty(); } }; +enum class BasicType { + Bool, + Char, + I8, + I16, + I32, + I64, + I128, + ISize, + U8, + U16, + U32, + U64, + U128, + USize, + F32, + F64, + Str, + Placeholder, + Unit, + Variadic, + Never, +}; + class Demangler { // Maximum recursion level. Used to avoid stack overflow. size_t MaxRecursionLevel; @@ -54,11 +78,14 @@ void demanglePath(); void demangleGenericArg(); void demangleType(); + void demangleConst(); + void demangleConstInt(); Identifier parseIdentifier(); uint64_t parseOptionalBase62Number(char Tag); uint64_t parseBase62Number(); uint64_t parseDecimalNumber(); + StringView parseHexNumber(); void print(char C) { if (Error) @@ -81,6 +108,8 @@ Output << N; } + void printBasicType(BasicType); + char look() const { if (Error || Position >= Input.size()) return 0; diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp --- a/llvm/lib/Demangle/RustDemangle.cpp +++ b/llvm/lib/Demangle/RustDemangle.cpp @@ -12,6 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Demangle/RustDemangle.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Demangle/Demangle.h" #include @@ -80,6 +83,10 @@ static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } +static inline bool isHexDigit(const char C) { + return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); +} + static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } @@ -200,39 +207,13 @@ // | "K" // = "L" void Demangler::demangleGenericArg() { - // FIXME parse remaining productions - demangleType(); + if (consumeIf('K')) + demangleConst(); + else + demangleType(); + // FIXME demangle lifetimes. } -static const char *const BasicTypes[] = { - "i8", // a - "bool", // b - "char", // c - "f64", // d - "str", // e - "f32", // f - nullptr, // g - "u8", // h - "isize", // i - "usize", // j - nullptr, // k - "i32", // l - "u32", // m - "i128", // n - "u128", // o - "_", // p - nullptr, // q - nullptr, // r - "i16", // s - "u16", // t - "()", // u - "...", // v - nullptr, // w - "i64", // x - "u64", // y - "!", // z -}; - // = "a" // i8 // | "b" // bool // | "c" // char @@ -254,10 +235,121 @@ // | "y" // u64 // | "z" // ! // | "p" // placeholder (e.g. for generic params), shown as _ -static const char *parseBasicType(char C) { - if (isLower(C)) - return BasicTypes[C - 'a']; - return nullptr; +static Optional parseBasicType(char C) { + switch (C) { + case 'a': + return BasicType::I8; + case 'b': + return BasicType::Bool; + case 'c': + return BasicType::Char; + case 'd': + return BasicType::F64; + case 'e': + return BasicType::Str; + case 'f': + return BasicType::F32; + case 'h': + return BasicType::U8; + case 'i': + return BasicType::ISize; + case 'j': + return BasicType::USize; + case 'l': + return BasicType::I32; + case 'm': + return BasicType::U32; + case 'n': + return BasicType::I128; + case 'o': + return BasicType::U128; + case 'p': + return BasicType::Placeholder; + case 's': + return BasicType::I16; + case 't': + return BasicType::U16; + case 'u': + return BasicType::Unit; + case 'v': + return BasicType::Variadic; + case 'x': + return BasicType::I64; + case 'y': + return BasicType::U64; + case 'z': + return BasicType::Never; + default: + return None; + } +} + +void Demangler::printBasicType(BasicType Type) { + switch (Type) { + case BasicType::Bool: + print("bool"); + break; + case BasicType::Char: + print("char"); + break; + case BasicType::I8: + print("i8"); + break; + case BasicType::I16: + print("i16"); + break; + case BasicType::I32: + print("i32"); + break; + case BasicType::I64: + print("i64"); + break; + case BasicType::I128: + print("i128"); + break; + case BasicType::ISize: + print("isize"); + break; + case BasicType::U8: + print("u8"); + break; + case BasicType::U16: + print("u16"); + break; + case BasicType::U32: + print("u32"); + break; + case BasicType::U64: + print("u64"); + break; + case BasicType::U128: + print("u128"); + break; + case BasicType::USize: + print("usize"); + break; + case BasicType::F32: + print("f32"); + break; + case BasicType::F64: + print("f64"); + break; + case BasicType::Str: + print("str"); + break; + case BasicType::Placeholder: + print("_"); + break; + case BasicType::Unit: + print("()"); + break; + case BasicType::Variadic: + print("..."); + break; + case BasicType::Never: + print("!"); + break; + } } // = | @@ -273,14 +365,63 @@ // | "D" // dyn Trait + Send + 'a // | // backref void Demangler::demangleType() { - if (const char *BasicType = parseBasicType(consume())) { - print(BasicType); + if (Optional Type = parseBasicType(consume())) + printBasicType(*Type); + else + Error = true; // FIXME parse remaining productions. +} + +// = +// | "p" // placeholder +// | +void Demangler::demangleConst() { + if (Optional Type = parseBasicType(consume())) { + switch (*Type) { + case BasicType::I8: + case BasicType::I16: + case BasicType::I32: + case BasicType::I64: + case BasicType::I128: + case BasicType::ISize: + case BasicType::U8: + case BasicType::U16: + case BasicType::U32: + case BasicType::U64: + case BasicType::U128: + case BasicType::USize: + demangleConstInt(); + break; + case BasicType::Placeholder: + print('_'); + break; + default: + // FIXME demangle backreferences, bool constants, and char constants. + Error = true; + break; + } } else { - // FIXME parse remaining productions. Error = true; } } +// = ["n"] {} "_" +void Demangler::demangleConstInt() { + if (consumeIf('n')) + print('-'); + + StringView HexDigits = parseHexNumber(); + if (Error || HexDigits.size() > 32) { + Error = true; + return; + } + + APInt Value(HexDigits.size() * 4 /* NumBits */, + StringRef(HexDigits.begin(), HexDigits.size()), 16 /* Radix */); + SmallString<40> Str; + Value.toString(Str, 10, false /* Signed */); + print(StringView(Str.begin(), Str.end())); +} + // = ["u"] ["_"] Identifier Demangler::parseIdentifier() { bool Punycode = consumeIf('u'); @@ -390,3 +531,30 @@ return Value; } + +// Parses a hexadecimal number with <0-9a-f> as a digits. +// +// | "0_" +// = <1-9a-f> {<0-9a-f>} "_" +StringView Demangler::parseHexNumber() { + size_t Start = Position; + + if (!isHexDigit(look())) + Error = true; + + if (consumeIf('0')) { + if (!consumeIf('_')) + Error = true; + } else { + while (!Error && !consumeIf('_')) + if (!isHexDigit(consume())) + Error = true; + } + + if (Error) + return StringView(); + + size_t End = Position - 1; + assert(Start < End); + return Input.substr(Start, End - Start); +} diff --git a/llvm/test/Demangle/rust.test b/llvm/test/Demangle/rust.test --- a/llvm/test/Demangle/rust.test +++ b/llvm/test/Demangle/rust.test @@ -44,6 +44,11 @@ CHECK: generic::<_, _, _> _RIC7genericpppE +; Generic const arguments + +CHECK: generic_const::<_> + _RIC13generic_constKpE + ; Basic types CHECK: basic:: @@ -109,6 +114,87 @@ CHECK: basic:: _RIC5basiczE +; Integer constants. Test value demangling. + +CHECK: integer::<0> + _RIC7integerKi0_E + +CHECK: integer::<1> + _RIC7integerKi1_E + +CHECK: integer::<-1> + _RIC7integerKin1_E + +CHECK: integer::<-15> + _RIC7integerKinf_E + +CHECK: integer::<-16> + _RIC7integerKin10_E + +CHECK: integer::<18446744073709551615> + _RIC7integerKoffffffffffffffff_E + +; Integer constant with maximum possible value: + +CHECK: integer::<340282366920938463463374607431768211455> + _RIC7integerKoffffffffffffffffffffffffffffffff_E + +; Integer constant with too many digits: + +CHECK: _RIC7integerKo123456789012345678901234567890123_E + _RIC7integerKo123456789012345678901234567890123_E + +; Invalid integer constant without any digits: + +CHECK: _RIC7integerKi_E + _RIC7integerKi_E + +; Invalid integer constants with insignificant leading zeros: + +CHECK: _RIC7integerKi00_E + _RIC7integerKi00_E + +CHECK: _RIC7integerKi01_E + _RIC7integerKi01_E + +; Integer constants. Test all integer types. + +CHECK: integer::<0, i8> + _RIC7integerKa0_aE + +CHECK: integer::<0, u8> + _RIC7integerKh0_hE + +CHECK: integer::<0, isize> + _RIC7integerKi0_iE + +CHECK: integer::<0, usize> + _RIC7integerKj0_jE + +CHECK: integer::<0, i32> + _RIC7integerKl0_lE + +CHECK: integer::<0, u32> + _RIC7integerKm0_mE + +CHECK: integer::<0, i128> + _RIC7integerKn0_nE + +CHECK: integer::<0, u128> + _RIC7integerKo0_oE + +CHECK: integer::<0, i16> + _RIC7integerKs0_sE + +CHECK: integer::<0, u16> + _RIC7integerKt0_tE + +CHECK: integer::<0, i64> + _RIC7integerKx0_xE + +CHECK: integer::<0, u64> + _RIC7integerKy0_yE + ; Invalid mangled characters CHECK: _RNvC2a.1c