Skip to content

Commit 65fd2fc

Browse files
author
Zachary Turner
committedSep 22, 2016
[Support] Add StringRef::consumeInteger.
StringRef::getInteger() exists and treats the entire string as an integer of the specified radix, failing if any invalid characters are encountered or the number overflows. Sometimes you might have something like "123456foo" and you want to get the number 123456 and leave the string "foo" remaining. This is similar to what would be possible by using the standard runtime library functions strtoul et al and specifying an end pointer. This patch adds consumeInteger(), which does exactly that. It consumes as much as possible until an invalid character is found, and modifies the StringRef in place so that upon return only the portion of the StringRef after the number remains. Differential Revision: https://reviews.llvm.org/D24778 llvm-svn: 282164
1 parent 7f0e315 commit 65fd2fc

File tree

3 files changed

+267
-25
lines changed

3 files changed

+267
-25
lines changed
 

‎llvm/include/llvm/ADT/StringRef.h

+35
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ namespace llvm {
3232

3333
bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
3434

35+
bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
36+
unsigned long long &Result);
37+
bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
38+
3539
/// StringRef - Represent a constant reference to a string, i.e. a character
3640
/// array and a length, which need not be null terminated.
3741
///
@@ -397,6 +401,37 @@ namespace llvm {
397401
return false;
398402
}
399403

404+
/// Parse the current string as an integer of the specified radix. If
405+
/// \p Radix is specified as zero, this does radix autosensing using
406+
/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
407+
///
408+
/// If the string does not begin with a number of the specified radix,
409+
/// this returns true to signify the error. The string is considered
410+
/// erroneous if empty or if it overflows T.
411+
/// The portion of the string representing the discovered numeric value
412+
/// is removed from the beginning of the string.
413+
template <typename T>
414+
typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
415+
consumeInteger(unsigned Radix, T &Result) {
416+
long long LLVal;
417+
if (consumeSignedInteger(*this, Radix, LLVal) ||
418+
static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
419+
return true;
420+
Result = LLVal;
421+
return false;
422+
}
423+
424+
template <typename T>
425+
typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
426+
consumeInteger(unsigned Radix, T &Result) {
427+
unsigned long long ULLVal;
428+
if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
429+
static_cast<long long>(static_cast<T>(ULLVal)) != ULLVal)
430+
return true;
431+
Result = ULLVal;
432+
return false;
433+
}
434+
400435
/// Parse the current string as an integer of the specified \p Radix, or of
401436
/// an autosensed radix if the \p Radix given is 0. The current value in
402437
/// \p Result is discarded, and the storage is changed to be wide enough to

‎llvm/lib/Support/StringRef.cpp

+55-25
Original file line numberDiff line numberDiff line change
@@ -366,17 +366,16 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
366366
return 8;
367367
}
368368

369-
if (Str.startswith("0"))
369+
if (Str[0] == '0' && Str.size() > 1 && ascii_isdigit(Str[1])) {
370+
Str = Str.substr(1);
370371
return 8;
371-
372+
}
373+
372374
return 10;
373375
}
374376

375-
376-
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
377-
/// sequence of radix up to 36 to an unsigned long long value.
378-
bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
379-
unsigned long long &Result) {
377+
bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
378+
unsigned long long &Result) {
380379
// Autosense radix if not specified.
381380
if (Radix == 0)
382381
Radix = GetAutoSenseRadix(Str);
@@ -385,44 +384,51 @@ bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
385384
if (Str.empty()) return true;
386385

387386
// Parse all the bytes of the string given this radix. Watch for overflow.
387+
StringRef Str2 = Str;
388388
Result = 0;
389-
while (!Str.empty()) {
389+
while (!Str2.empty()) {
390390
unsigned CharVal;
391-
if (Str[0] >= '0' && Str[0] <= '9')
392-
CharVal = Str[0]-'0';
393-
else if (Str[0] >= 'a' && Str[0] <= 'z')
394-
CharVal = Str[0]-'a'+10;
395-
else if (Str[0] >= 'A' && Str[0] <= 'Z')
396-
CharVal = Str[0]-'A'+10;
391+
if (Str2[0] >= '0' && Str2[0] <= '9')
392+
CharVal = Str2[0] - '0';
393+
else if (Str2[0] >= 'a' && Str2[0] <= 'z')
394+
CharVal = Str2[0] - 'a' + 10;
395+
else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
396+
CharVal = Str2[0] - 'A' + 10;
397397
else
398-
return true;
398+
break;
399399

400-
// If the parsed value is larger than the integer radix, the string is
401-
// invalid.
400+
// If the parsed value is larger than the integer radix, we cannot
401+
// consume any more characters.
402402
if (CharVal >= Radix)
403-
return true;
403+
break;
404404

405405
// Add in this character.
406406
unsigned long long PrevResult = Result;
407-
Result = Result*Radix+CharVal;
407+
Result = Result * Radix + CharVal;
408408

409409
// Check for overflow by shifting back and seeing if bits were lost.
410-
if (Result/Radix < PrevResult)
410+
if (Result / Radix < PrevResult)
411411
return true;
412412

413-
Str = Str.substr(1);
413+
Str2 = Str2.substr(1);
414414
}
415415

416+
// We consider the operation a failure if no characters were consumed
417+
// successfully.
418+
if (Str.size() == Str2.size())
419+
return true;
420+
421+
Str = Str2;
416422
return false;
417423
}
418424

419-
bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
420-
long long &Result) {
425+
bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
426+
long long &Result) {
421427
unsigned long long ULLVal;
422428

423429
// Handle positive strings first.
424430
if (Str.empty() || Str.front() != '-') {
425-
if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
431+
if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
426432
// Check for value so large it overflows a signed value.
427433
(long long)ULLVal < 0)
428434
return true;
@@ -431,17 +437,41 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
431437
}
432438

433439
// Get the positive part of the value.
434-
if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
440+
StringRef Str2 = Str.drop_front(1);
441+
if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
435442
// Reject values so large they'd overflow as negative signed, but allow
436443
// "-0". This negates the unsigned so that the negative isn't undefined
437444
// on signed overflow.
438445
(long long)-ULLVal > 0)
439446
return true;
440447

448+
Str = Str2;
441449
Result = -ULLVal;
442450
return false;
443451
}
444452

453+
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
454+
/// sequence of radix up to 36 to an unsigned long long value.
455+
bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
456+
unsigned long long &Result) {
457+
if (consumeUnsignedInteger(Str, Radix, Result))
458+
return true;
459+
460+
// For getAsUnsignedInteger, we require the whole string to be consumed or
461+
// else we consider it a failure.
462+
return !Str.empty();
463+
}
464+
465+
bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
466+
long long &Result) {
467+
if (consumeSignedInteger(Str, Radix, Result))
468+
return true;
469+
470+
// For getAsSignedInteger, we require the whole string to be consumed or else
471+
// we consider it a failure.
472+
return !Str.empty();
473+
}
474+
445475
bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
446476
StringRef Str = *this;
447477

‎llvm/unittests/ADT/StringRefTest.cpp

+177
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,183 @@ TEST(StringRefTest, getAsUnsignedIntegerBadStrings) {
590590
}
591591
}
592592

593+
struct ConsumeUnsignedPair {
594+
const char *Str;
595+
uint64_t Expected;
596+
const char *Leftover;
597+
} ConsumeUnsigned[] = {
598+
{"0", 0, ""},
599+
{"255", 255, ""},
600+
{"256", 256, ""},
601+
{"65535", 65535, ""},
602+
{"65536", 65536, ""},
603+
{"4294967295", 4294967295ULL, ""},
604+
{"4294967296", 4294967296ULL, ""},
605+
{"255A376", 255, "A376"},
606+
{"18446744073709551615", 18446744073709551615ULL, ""},
607+
{"18446744073709551615ABC", 18446744073709551615ULL, "ABC"},
608+
{"042", 34, ""},
609+
{"0x42", 66, ""},
610+
{"0x42-0x34", 66, "-0x34"},
611+
{"0b101010", 42, ""},
612+
{"0429F", 042, "9F"}, // Auto-sensed octal radix, invalid digit
613+
{"0x42G12", 0x42, "G12"}, // Auto-sensed hex radix, invalid digit
614+
{"0b10101020101", 42, "20101"}}; // Auto-sensed binary radix, invalid digit.
615+
616+
struct ConsumeSignedPair {
617+
const char *Str;
618+
int64_t Expected;
619+
const char *Leftover;
620+
} ConsumeSigned[] = {
621+
{"0", 0, ""},
622+
{"-0", 0, ""},
623+
{"0-1", 0, "-1"},
624+
{"-0-1", 0, "-1"},
625+
{"127", 127, ""},
626+
{"128", 128, ""},
627+
{"127-1", 127, "-1"},
628+
{"128-1", 128, "-1"},
629+
{"-128", -128, ""},
630+
{"-129", -129, ""},
631+
{"-128-1", -128, "-1"},
632+
{"-129-1", -129, "-1"},
633+
{"32767", 32767, ""},
634+
{"32768", 32768, ""},
635+
{"32767-1", 32767, "-1"},
636+
{"32768-1", 32768, "-1"},
637+
{"-32768", -32768, ""},
638+
{"-32769", -32769, ""},
639+
{"-32768-1", -32768, "-1"},
640+
{"-32769-1", -32769, "-1"},
641+
{"2147483647", 2147483647LL, ""},
642+
{"2147483648", 2147483648LL, ""},
643+
{"2147483647-1", 2147483647LL, "-1"},
644+
{"2147483648-1", 2147483648LL, "-1"},
645+
{"-2147483648", -2147483648LL, ""},
646+
{"-2147483649", -2147483649LL, ""},
647+
{"-2147483648-1", -2147483648LL, "-1"},
648+
{"-2147483649-1", -2147483649LL, "-1"},
649+
{"-9223372036854775808", -(9223372036854775807LL) - 1, ""},
650+
{"-9223372036854775808-1", -(9223372036854775807LL) - 1, "-1"},
651+
{"042", 34, ""},
652+
{"042-1", 34, "-1"},
653+
{"0x42", 66, ""},
654+
{"0x42-1", 66, "-1"},
655+
{"0b101010", 42, ""},
656+
{"0b101010-1", 42, "-1"},
657+
{"-042", -34, ""},
658+
{"-042-1", -34, "-1"},
659+
{"-0x42", -66, ""},
660+
{"-0x42-1", -66, "-1"},
661+
{"-0b101010", -42, ""},
662+
{"-0b101010-1", -42, "-1"}};
663+
664+
TEST(StringRefTest, consumeIntegerUnsigned) {
665+
uint8_t U8;
666+
uint16_t U16;
667+
uint32_t U32;
668+
uint64_t U64;
669+
670+
for (size_t i = 0; i < array_lengthof(ConsumeUnsigned); ++i) {
671+
StringRef Str = ConsumeUnsigned[i].Str;
672+
bool U8Success = Str.consumeInteger(0, U8);
673+
if (static_cast<uint8_t>(ConsumeUnsigned[i].Expected) ==
674+
ConsumeUnsigned[i].Expected) {
675+
ASSERT_FALSE(U8Success);
676+
EXPECT_EQ(U8, ConsumeUnsigned[i].Expected);
677+
EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
678+
} else {
679+
ASSERT_TRUE(U8Success);
680+
}
681+
682+
Str = ConsumeUnsigned[i].Str;
683+
bool U16Success = Str.consumeInteger(0, U16);
684+
if (static_cast<uint16_t>(ConsumeUnsigned[i].Expected) ==
685+
ConsumeUnsigned[i].Expected) {
686+
ASSERT_FALSE(U16Success);
687+
EXPECT_EQ(U16, ConsumeUnsigned[i].Expected);
688+
EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
689+
} else {
690+
ASSERT_TRUE(U16Success);
691+
}
692+
693+
Str = ConsumeUnsigned[i].Str;
694+
bool U32Success = Str.consumeInteger(0, U32);
695+
if (static_cast<uint32_t>(ConsumeUnsigned[i].Expected) ==
696+
ConsumeUnsigned[i].Expected) {
697+
ASSERT_FALSE(U32Success);
698+
EXPECT_EQ(U32, ConsumeUnsigned[i].Expected);
699+
EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
700+
} else {
701+
ASSERT_TRUE(U32Success);
702+
}
703+
704+
Str = ConsumeUnsigned[i].Str;
705+
bool U64Success = Str.consumeInteger(0, U64);
706+
if (static_cast<uint64_t>(ConsumeUnsigned[i].Expected) ==
707+
ConsumeUnsigned[i].Expected) {
708+
ASSERT_FALSE(U64Success);
709+
EXPECT_EQ(U64, ConsumeUnsigned[i].Expected);
710+
EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
711+
} else {
712+
ASSERT_TRUE(U64Success);
713+
}
714+
}
715+
}
716+
717+
TEST(StringRefTest, consumeIntegerSigned) {
718+
int8_t S8;
719+
int16_t S16;
720+
int32_t S32;
721+
int64_t S64;
722+
723+
for (size_t i = 0; i < array_lengthof(ConsumeSigned); ++i) {
724+
StringRef Str = ConsumeSigned[i].Str;
725+
bool S8Success = Str.consumeInteger(0, S8);
726+
if (static_cast<int8_t>(ConsumeSigned[i].Expected) ==
727+
ConsumeSigned[i].Expected) {
728+
ASSERT_FALSE(S8Success);
729+
EXPECT_EQ(S8, ConsumeSigned[i].Expected);
730+
EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
731+
} else {
732+
ASSERT_TRUE(S8Success);
733+
}
734+
735+
Str = ConsumeSigned[i].Str;
736+
bool S16Success = Str.consumeInteger(0, S16);
737+
if (static_cast<int16_t>(ConsumeSigned[i].Expected) ==
738+
ConsumeSigned[i].Expected) {
739+
ASSERT_FALSE(S16Success);
740+
EXPECT_EQ(S16, ConsumeSigned[i].Expected);
741+
EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
742+
} else {
743+
ASSERT_TRUE(S16Success);
744+
}
745+
746+
Str = ConsumeSigned[i].Str;
747+
bool S32Success = Str.consumeInteger(0, S32);
748+
if (static_cast<int32_t>(ConsumeSigned[i].Expected) ==
749+
ConsumeSigned[i].Expected) {
750+
ASSERT_FALSE(S32Success);
751+
EXPECT_EQ(S32, ConsumeSigned[i].Expected);
752+
EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
753+
} else {
754+
ASSERT_TRUE(S32Success);
755+
}
756+
757+
Str = ConsumeSigned[i].Str;
758+
bool S64Success = Str.consumeInteger(0, S64);
759+
if (static_cast<int64_t>(ConsumeSigned[i].Expected) ==
760+
ConsumeSigned[i].Expected) {
761+
ASSERT_FALSE(S64Success);
762+
EXPECT_EQ(S64, ConsumeSigned[i].Expected);
763+
EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
764+
} else {
765+
ASSERT_TRUE(S64Success);
766+
}
767+
}
768+
}
769+
593770
static const char *join_input[] = { "a", "b", "c" };
594771
static const char join_result1[] = "a";
595772
static const char join_result2[] = "a:b:c";

0 commit comments

Comments
 (0)
Please sign in to comment.