Index: llvm/trunk/include/llvm/ADT/StringRef.h =================================================================== --- llvm/trunk/include/llvm/ADT/StringRef.h +++ llvm/trunk/include/llvm/ADT/StringRef.h @@ -32,6 +32,10 @@ bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); + bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, + unsigned long long &Result); + bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); + /// StringRef - Represent a constant reference to a string, i.e. a character /// array and a length, which need not be null terminated. /// @@ -397,6 +401,37 @@ return false; } + /// Parse the current string as an integer of the specified radix. If + /// \p Radix is specified as zero, this does radix autosensing using + /// extended C rules: 0 is octal, 0x is hex, 0b is binary. + /// + /// If the string does not begin with a number of the specified radix, + /// this returns true to signify the error. The string is considered + /// erroneous if empty or if it overflows T. + /// The portion of the string representing the discovered numeric value + /// is removed from the beginning of the string. + template + typename std::enable_if::is_signed, bool>::type + consumeInteger(unsigned Radix, T &Result) { + long long LLVal; + if (consumeSignedInteger(*this, Radix, LLVal) || + static_cast(static_cast(LLVal)) != LLVal) + return true; + Result = LLVal; + return false; + } + + template + typename std::enable_if::is_signed, bool>::type + consumeInteger(unsigned Radix, T &Result) { + unsigned long long ULLVal; + if (consumeUnsignedInteger(*this, Radix, ULLVal) || + static_cast(static_cast(ULLVal)) != ULLVal) + return true; + Result = ULLVal; + return false; + } + /// Parse the current string as an integer of the specified \p Radix, or of /// an autosensed radix if the \p Radix given is 0. The current value in /// \p Result is discarded, and the storage is changed to be wide enough to Index: llvm/trunk/lib/Support/StringRef.cpp =================================================================== --- llvm/trunk/lib/Support/StringRef.cpp +++ llvm/trunk/lib/Support/StringRef.cpp @@ -366,17 +366,16 @@ return 8; } - if (Str.startswith("0")) + if (Str[0] == '0' && Str.size() > 1 && ascii_isdigit(Str[1])) { + Str = Str.substr(1); return 8; - + } + return 10; } - -/// GetAsUnsignedInteger - Workhorse method that converts a integer character -/// sequence of radix up to 36 to an unsigned long long value. -bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix, - unsigned long long &Result) { +bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix, + unsigned long long &Result) { // Autosense radix if not specified. if (Radix == 0) Radix = GetAutoSenseRadix(Str); @@ -385,44 +384,51 @@ if (Str.empty()) return true; // Parse all the bytes of the string given this radix. Watch for overflow. + StringRef Str2 = Str; Result = 0; - while (!Str.empty()) { + while (!Str2.empty()) { unsigned CharVal; - if (Str[0] >= '0' && Str[0] <= '9') - CharVal = Str[0]-'0'; - else if (Str[0] >= 'a' && Str[0] <= 'z') - CharVal = Str[0]-'a'+10; - else if (Str[0] >= 'A' && Str[0] <= 'Z') - CharVal = Str[0]-'A'+10; + if (Str2[0] >= '0' && Str2[0] <= '9') + CharVal = Str2[0] - '0'; + else if (Str2[0] >= 'a' && Str2[0] <= 'z') + CharVal = Str2[0] - 'a' + 10; + else if (Str2[0] >= 'A' && Str2[0] <= 'Z') + CharVal = Str2[0] - 'A' + 10; else - return true; + break; - // If the parsed value is larger than the integer radix, the string is - // invalid. + // If the parsed value is larger than the integer radix, we cannot + // consume any more characters. if (CharVal >= Radix) - return true; + break; // Add in this character. unsigned long long PrevResult = Result; - Result = Result*Radix+CharVal; + Result = Result * Radix + CharVal; // Check for overflow by shifting back and seeing if bits were lost. - if (Result/Radix < PrevResult) + if (Result / Radix < PrevResult) return true; - Str = Str.substr(1); + Str2 = Str2.substr(1); } + // We consider the operation a failure if no characters were consumed + // successfully. + if (Str.size() == Str2.size()) + return true; + + Str = Str2; return false; } -bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, - long long &Result) { +bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix, + long long &Result) { unsigned long long ULLVal; // Handle positive strings first. if (Str.empty() || Str.front() != '-') { - if (getAsUnsignedInteger(Str, Radix, ULLVal) || + if (consumeUnsignedInteger(Str, Radix, ULLVal) || // Check for value so large it overflows a signed value. (long long)ULLVal < 0) return true; @@ -431,17 +437,41 @@ } // Get the positive part of the value. - if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) || + StringRef Str2 = Str.drop_front(1); + if (consumeUnsignedInteger(Str2, Radix, ULLVal) || // Reject values so large they'd overflow as negative signed, but allow // "-0". This negates the unsigned so that the negative isn't undefined // on signed overflow. (long long)-ULLVal > 0) return true; + Str = Str2; Result = -ULLVal; return false; } +/// GetAsUnsignedInteger - Workhorse method that converts a integer character +/// sequence of radix up to 36 to an unsigned long long value. +bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result) { + if (consumeUnsignedInteger(Str, Radix, Result)) + return true; + + // For getAsUnsignedInteger, we require the whole string to be consumed or + // else we consider it a failure. + return !Str.empty(); +} + +bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, + long long &Result) { + if (consumeSignedInteger(Str, Radix, Result)) + return true; + + // For getAsSignedInteger, we require the whole string to be consumed or else + // we consider it a failure. + return !Str.empty(); +} + bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { StringRef Str = *this; Index: llvm/trunk/unittests/ADT/StringRefTest.cpp =================================================================== --- llvm/trunk/unittests/ADT/StringRefTest.cpp +++ llvm/trunk/unittests/ADT/StringRefTest.cpp @@ -590,6 +590,183 @@ } } +struct ConsumeUnsignedPair { + const char *Str; + uint64_t Expected; + const char *Leftover; +} ConsumeUnsigned[] = { + {"0", 0, ""}, + {"255", 255, ""}, + {"256", 256, ""}, + {"65535", 65535, ""}, + {"65536", 65536, ""}, + {"4294967295", 4294967295ULL, ""}, + {"4294967296", 4294967296ULL, ""}, + {"255A376", 255, "A376"}, + {"18446744073709551615", 18446744073709551615ULL, ""}, + {"18446744073709551615ABC", 18446744073709551615ULL, "ABC"}, + {"042", 34, ""}, + {"0x42", 66, ""}, + {"0x42-0x34", 66, "-0x34"}, + {"0b101010", 42, ""}, + {"0429F", 042, "9F"}, // Auto-sensed octal radix, invalid digit + {"0x42G12", 0x42, "G12"}, // Auto-sensed hex radix, invalid digit + {"0b10101020101", 42, "20101"}}; // Auto-sensed binary radix, invalid digit. + +struct ConsumeSignedPair { + const char *Str; + int64_t Expected; + const char *Leftover; +} ConsumeSigned[] = { + {"0", 0, ""}, + {"-0", 0, ""}, + {"0-1", 0, "-1"}, + {"-0-1", 0, "-1"}, + {"127", 127, ""}, + {"128", 128, ""}, + {"127-1", 127, "-1"}, + {"128-1", 128, "-1"}, + {"-128", -128, ""}, + {"-129", -129, ""}, + {"-128-1", -128, "-1"}, + {"-129-1", -129, "-1"}, + {"32767", 32767, ""}, + {"32768", 32768, ""}, + {"32767-1", 32767, "-1"}, + {"32768-1", 32768, "-1"}, + {"-32768", -32768, ""}, + {"-32769", -32769, ""}, + {"-32768-1", -32768, "-1"}, + {"-32769-1", -32769, "-1"}, + {"2147483647", 2147483647LL, ""}, + {"2147483648", 2147483648LL, ""}, + {"2147483647-1", 2147483647LL, "-1"}, + {"2147483648-1", 2147483648LL, "-1"}, + {"-2147483648", -2147483648LL, ""}, + {"-2147483649", -2147483649LL, ""}, + {"-2147483648-1", -2147483648LL, "-1"}, + {"-2147483649-1", -2147483649LL, "-1"}, + {"-9223372036854775808", -(9223372036854775807LL) - 1, ""}, + {"-9223372036854775808-1", -(9223372036854775807LL) - 1, "-1"}, + {"042", 34, ""}, + {"042-1", 34, "-1"}, + {"0x42", 66, ""}, + {"0x42-1", 66, "-1"}, + {"0b101010", 42, ""}, + {"0b101010-1", 42, "-1"}, + {"-042", -34, ""}, + {"-042-1", -34, "-1"}, + {"-0x42", -66, ""}, + {"-0x42-1", -66, "-1"}, + {"-0b101010", -42, ""}, + {"-0b101010-1", -42, "-1"}}; + +TEST(StringRefTest, consumeIntegerUnsigned) { + uint8_t U8; + uint16_t U16; + uint32_t U32; + uint64_t U64; + + for (size_t i = 0; i < array_lengthof(ConsumeUnsigned); ++i) { + StringRef Str = ConsumeUnsigned[i].Str; + bool U8Success = Str.consumeInteger(0, U8); + if (static_cast(ConsumeUnsigned[i].Expected) == + ConsumeUnsigned[i].Expected) { + ASSERT_FALSE(U8Success); + EXPECT_EQ(U8, ConsumeUnsigned[i].Expected); + EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover); + } else { + ASSERT_TRUE(U8Success); + } + + Str = ConsumeUnsigned[i].Str; + bool U16Success = Str.consumeInteger(0, U16); + if (static_cast(ConsumeUnsigned[i].Expected) == + ConsumeUnsigned[i].Expected) { + ASSERT_FALSE(U16Success); + EXPECT_EQ(U16, ConsumeUnsigned[i].Expected); + EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover); + } else { + ASSERT_TRUE(U16Success); + } + + Str = ConsumeUnsigned[i].Str; + bool U32Success = Str.consumeInteger(0, U32); + if (static_cast(ConsumeUnsigned[i].Expected) == + ConsumeUnsigned[i].Expected) { + ASSERT_FALSE(U32Success); + EXPECT_EQ(U32, ConsumeUnsigned[i].Expected); + EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover); + } else { + ASSERT_TRUE(U32Success); + } + + Str = ConsumeUnsigned[i].Str; + bool U64Success = Str.consumeInteger(0, U64); + if (static_cast(ConsumeUnsigned[i].Expected) == + ConsumeUnsigned[i].Expected) { + ASSERT_FALSE(U64Success); + EXPECT_EQ(U64, ConsumeUnsigned[i].Expected); + EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover); + } else { + ASSERT_TRUE(U64Success); + } + } +} + +TEST(StringRefTest, consumeIntegerSigned) { + int8_t S8; + int16_t S16; + int32_t S32; + int64_t S64; + + for (size_t i = 0; i < array_lengthof(ConsumeSigned); ++i) { + StringRef Str = ConsumeSigned[i].Str; + bool S8Success = Str.consumeInteger(0, S8); + if (static_cast(ConsumeSigned[i].Expected) == + ConsumeSigned[i].Expected) { + ASSERT_FALSE(S8Success); + EXPECT_EQ(S8, ConsumeSigned[i].Expected); + EXPECT_EQ(Str, ConsumeSigned[i].Leftover); + } else { + ASSERT_TRUE(S8Success); + } + + Str = ConsumeSigned[i].Str; + bool S16Success = Str.consumeInteger(0, S16); + if (static_cast(ConsumeSigned[i].Expected) == + ConsumeSigned[i].Expected) { + ASSERT_FALSE(S16Success); + EXPECT_EQ(S16, ConsumeSigned[i].Expected); + EXPECT_EQ(Str, ConsumeSigned[i].Leftover); + } else { + ASSERT_TRUE(S16Success); + } + + Str = ConsumeSigned[i].Str; + bool S32Success = Str.consumeInteger(0, S32); + if (static_cast(ConsumeSigned[i].Expected) == + ConsumeSigned[i].Expected) { + ASSERT_FALSE(S32Success); + EXPECT_EQ(S32, ConsumeSigned[i].Expected); + EXPECT_EQ(Str, ConsumeSigned[i].Leftover); + } else { + ASSERT_TRUE(S32Success); + } + + Str = ConsumeSigned[i].Str; + bool S64Success = Str.consumeInteger(0, S64); + if (static_cast(ConsumeSigned[i].Expected) == + ConsumeSigned[i].Expected) { + ASSERT_FALSE(S64Success); + EXPECT_EQ(S64, ConsumeSigned[i].Expected); + EXPECT_EQ(Str, ConsumeSigned[i].Leftover); + } else { + ASSERT_TRUE(S64Success); + } + } +} + static const char *join_input[] = { "a", "b", "c" }; static const char join_result1[] = "a"; static const char join_result2[] = "a:b:c";