diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h --- a/llvm/include/llvm/Support/DataExtractor.h +++ b/llvm/include/llvm/Support/DataExtractor.h @@ -141,6 +141,62 @@ /// a default-initialized StringRef will be returned. StringRef getCStrRef(uint64_t *offset_ptr) const; + /// Extract a fixed length string from \a *OffsetPtr and consume \a Length + /// bytes. + /// + /// Returns a StringRef for the string from the data at the offset + /// pointed to by \a OffsetPtr. A fixed length C string will be extracted + /// and the \a OffsetPtr will be advanced by \a Length bytes. + /// + /// \param[in,out] OffsetPtr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// \param[in] Length + /// The length of the fixed length string to extract. If there are not + /// enough bytes in the data to extract the full string, the offset will + /// be left unmodified. + /// + /// \param[in] TrimChars + /// A set of characters to trim from the end of the string. Fixed length + /// strings are commonly either NULL terminated by one or more zero + /// bytes. Some clients have one or more spaces at the end of the string, + /// but a good default is to trim the NULL characters. + /// + /// \return + /// A StringRef for the C string value in the data. If the offset + /// pointed to by \a OffsetPtr is out of bounds, or if the + /// offset plus the length of the C string is out of bounds, + /// a default-initialized StringRef will be returned. + Optional getFixedLengthString(uint64_t *OffsetPtr, + uint64_t Length, StringRef TrimChars = {"\0", 1}) const; + + /// Extract a fixed number of bytes from the specified offset. + /// + /// Returns a StringRef for the bytes from the data at the offset + /// pointed to by \a OffsetPtr. A fixed length C string will be extracted + /// and the \a OffsetPtr will be advanced by \a Length bytes. + /// + /// \param[in,out] OffsetPtr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// \param[in] Length + /// The number of bytes to extract. If there are not enough bytes in the + /// data to extract all of the bytes, the offset will be left unmodified. + /// + /// \return + /// A StringRef for the extracted bytes. If the offset pointed to by + /// \a OffsetPtr is out of bounds, or if the offset plus the length + /// is out of bounds, a default-initialized StringRef will be returned. + Optional getBytes(uint64_t *OffsetPtr, uint64_t Length) const; + /// Extract an unsigned integer of size \a byte_size from \a /// *offset_ptr. /// diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp --- a/llvm/lib/Support/DataExtractor.cpp +++ b/llvm/lib/Support/DataExtractor.cpp @@ -171,6 +171,23 @@ return StringRef(); } +Optional DataExtractor::getFixedLengthString(uint64_t *OffsetPtr, + uint64_t Length, StringRef TrimChars) const { + + if (Optional Bytes = getBytes(OffsetPtr, Length)) + return Bytes->trim(TrimChars); + return llvm::None; +} + +Optional DataExtractor::getBytes(uint64_t *OffsetPtr, + uint64_t Length) const { + if (!isValidOffsetForDataOfSize(*OffsetPtr, Length)) + return llvm::None; + StringRef Result = Data.substr(*OffsetPtr, Length); + *OffsetPtr += Length; + return Result; +} + uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, llvm::Error *Err) const { assert(*offset_ptr <= Data.size()); diff --git a/llvm/unittests/Support/DataExtractorTest.cpp b/llvm/unittests/Support/DataExtractorTest.cpp --- a/llvm/unittests/Support/DataExtractorTest.cpp +++ b/llvm/unittests/Support/DataExtractorTest.cpp @@ -278,4 +278,54 @@ DataExtractor DE2(ArrayRef(Data), false, 8); EXPECT_EQ(DE2.size(), sizeof(Data)); } + +TEST(DataExtractorTest, FixedLengthString) { + const char Data[] = "hello\x00\x00\x00world \thola\x00"; + DataExtractor DE(StringRef(Data, sizeof(Data)-1), false, 8); + uint64_t Offset = 0; + Optional Str; + // Test extracting too many bytes doesn't modify Offset and returns None. + Str = DE.getFixedLengthString(&Offset, sizeof(Data)); + EXPECT_FALSE(Str.hasValue()); + EXPECT_EQ(Offset, 0u); + + // Test extracting a fixed width C string with trailing NULL characters. + Str = DE.getFixedLengthString(&Offset, 8); + EXPECT_EQ(Offset, 8u); + EXPECT_TRUE(Str.hasValue()); + EXPECT_EQ(Str->size(), 5u); + EXPECT_EQ(*Str, "hello"); + // Test extracting a fixed width C string with trailing space and tab + // characters. + Str = DE.getFixedLengthString(&Offset, 8, " \t"); + EXPECT_EQ(Offset, 16u); + EXPECT_TRUE(Str.hasValue()); + EXPECT_EQ(Str->size(), 5u); + EXPECT_EQ(*Str, "world"); + // Now extract a normal C string. + Str = DE.getCStrRef(&Offset); + EXPECT_EQ(Str->size(), 4u); + EXPECT_EQ(*Str, "hola"); +} + + +TEST(DataExtractorTest, GetBytes) { + // Use data with an embedded NULL character for good measure. + const char Data[] = "\x01\x02\x00\x04"; + StringRef Bytes(Data, sizeof(Data)-1); + DataExtractor DE(Bytes, false, 8); + uint64_t Offset = 0; + Optional Str; + // Test extracting too many bytes doesn't modify Offset and returns None. + Str = DE.getBytes(&Offset, sizeof(Data)); + EXPECT_FALSE(Str.hasValue()); + EXPECT_EQ(Offset, 0u); + // Test extracting 4 bytes from the stream. + Str = DE.getBytes(&Offset, 4); + EXPECT_EQ(Offset, 4u); + EXPECT_TRUE(Str.hasValue()); + EXPECT_EQ(Str->size(), 4u); + EXPECT_EQ(*Str, Bytes); +} + }