Index: llvm/docs/CodeGenerator.rst =================================================================== --- llvm/docs/CodeGenerator.rst +++ llvm/docs/CodeGenerator.rst @@ -1597,6 +1597,10 @@ * a magic number: "REMARKS\0" * the version number: a little-endian uint64_t +* the string table: + * the total size of the string table (the size itself excluded): + little-endian uint64_t + * a list of null-terminated strings * the absolute file path to the serialized remark diagnostics: a null-terminated string. Index: llvm/include/llvm/IR/RemarkStreamer.h =================================================================== --- llvm/include/llvm/IR/RemarkStreamer.h +++ llvm/include/llvm/IR/RemarkStreamer.h @@ -14,10 +14,11 @@ #define LLVM_IR_REMARKSTREAMER_H #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Regex.h" #include #include @@ -34,6 +35,11 @@ /// The YAML streamer. yaml::Output YAMLOutput; + /// The string table containing all the unique strings used in the output. + /// The table will be serialized in a section to be consumed after the + /// compilation. + remarks::StringTable StrTab; + public: RemarkStreamer(StringRef Filename, raw_ostream& OS); /// Return the filename that the remark diagnostics are emitted to. @@ -45,6 +51,9 @@ Error setFilter(StringRef Filter); /// Emit a diagnostic through the streamer. void emit(const DiagnosticInfoOptimizationBase &Diag); + /// The string table used during emission. + remarks::StringTable &getStringTable() { return StrTab; } + const remarks::StringTable &getStringTable() const { return StrTab; } }; } // end namespace llvm Index: llvm/include/llvm/Remarks/RemarkParser.h =================================================================== --- llvm/include/llvm/Remarks/RemarkParser.h +++ llvm/include/llvm/Remarks/RemarkParser.h @@ -13,6 +13,7 @@ #ifndef LLVM_REMARKS_REMARK_PARSER_H #define LLVM_REMARKS_REMARK_PARSER_H +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Remarks/Remark.h" #include "llvm/Support/Error.h" @@ -32,6 +33,11 @@ /// This constructor should be only used for parsing YAML remarks. Parser(StringRef Buffer); + /// Create a parser parsing \p Buffer to Remark objects, using \p StrTabBuf as + /// string table. + /// This constructor should be only used for parsing YAML remarks. + Parser(StringRef Buffer, StringRef StrTabBuf); + // Needed because ParserImpl is an incomplete type. ~Parser(); @@ -40,6 +46,18 @@ Expected getNext() const; }; +/// In-memory representation of the string table parsed from a buffer (e.g. the +/// remarks section). +struct ParsedStringTable { + /// The buffer mapped from the section contents. + StringRef Buffer; + /// Collection of offsets in the buffer for each string entry. + SmallVector Offsets; + + Expected operator[](size_t Index); + ParsedStringTable(StringRef Buffer); +}; + } // end namespace remarks } // end namespace llvm Index: llvm/include/llvm/Remarks/RemarkStringTable.h =================================================================== --- /dev/null +++ llvm/include/llvm/Remarks/RemarkStringTable.h @@ -0,0 +1,62 @@ +//===-- RemarkStringTable.h - Serializing string table ----------*- C++/-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class is used to deduplicate and serialize a string table used for +// generating remarks. +// +// For parsing a string table, use ParsedStringTable in RemarkParser.h +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_REMARK_STRING_TABLE_H +#define LLVM_REMARKS_REMARK_STRING_TABLE_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include + +namespace llvm { + +class raw_ostream; + +namespace remarks { + +/// The string table used for serializing remarks. +/// This table can be for example serialized in a section to be consumed after +/// the compilation. +struct StringTable { + /// Allocator holding all the memory used by the map. + BumpPtrAllocator Allocator; + /// The string table containing all the unique strings used in the output. + /// It maps a string to an unique ID. + StringMap StrTab; + /// Total size of the string table when serialized. + size_t SerializedSize = 0; + + StringTable() : Allocator(), StrTab(Allocator) {} + /// Add a string to the table. It returns an unique ID of the string. + std::pair add(StringRef Str); + /// It returns an unique ID of the string. If the string is not in the table, + /// the behavior is undefined. + unsigned get(StringRef Str) const; + /// Serialize the string table to a stream. It is serialized as a little + /// endian uint64 (the size of the table in bytes) followed by a sequence of + /// NULL-terminated strings, where the N-th string is the string with the ID N + /// in the StrTab map. + void serialize(raw_ostream &OS) const; + /// Serialize the string table to a vector. This allows users to do the actual + /// writing to file/memory/other. + /// The string with the ID == N should be the N-th element in the vector. + std::vector serialize() const; +}; + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_REMARK_STRING_TABLE_H */ Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1359,6 +1359,29 @@ support::endian::write64le(Version.data(), remarks::Version); OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size())); + // Emit the string table in the section. + // Note: we need to use the streamer here to emit it in the section. We can't + // just use the serialize function with a raw_ostream because of the way + // MCStreamers work. + const remarks::StringTable &StrTab = RS->getStringTable(); + std::vector StrTabStrings = StrTab.serialize(); + uint64_t StrTabSize = StrTab.SerializedSize; + // Emit the total size of the string table (the size itself excluded): + // little-endian uint64_t. + // The total size is located after the version number. + std::array StrTabSizeBuf; + support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); + OutStreamer->EmitBinaryData( + StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size())); + // Emit a list of null-terminated strings. + // Note: the order is important here: the ID used in the remarks corresponds + // to the position of the string in the section. + for (StringRef Str : StrTabStrings) { + OutStreamer->EmitBytes(Str); + // Explicitly emit a '\0'. + OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); + } + // Emit the null-terminated absolute path to the remark file. // The path is located at the offset 0x4 in the section. StringRef FilenameRef = RS->getFilename(); Index: llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt =================================================================== --- llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = AsmPrinter parent = Libraries -required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Support Target +required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Remarks Support Target Index: llvm/lib/IR/DiagnosticInfo.cpp =================================================================== --- llvm/lib/IR/DiagnosticInfo.cpp +++ llvm/lib/IR/DiagnosticInfo.cpp @@ -43,6 +43,8 @@ using namespace llvm; +cl::opt UseStringTable("remarks-yaml-string-table", cl::init(false)); + int llvm::getNextAvailablePluginDiagnosticKind() { static std::atomic PluginKindID(DK_FirstPluginKind); return ++PluginKindID; @@ -413,13 +415,28 @@ GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); StringRef PassName(OptDiag->PassName); - io.mapRequired("Pass", PassName); - io.mapRequired("Name", OptDiag->RemarkName); - if (!io.outputting() || DL.isValid()) - io.mapOptional("DebugLoc", DL); - io.mapRequired("Function", FN); - io.mapOptional("Hotness", OptDiag->Hotness); - io.mapOptional("Args", OptDiag->Args); + if (UseStringTable) { + remarks::StringTable &StrTab = + reinterpret_cast(io.getContext())->getStringTable(); + auto PassID = StrTab.add(PassName).first; + auto NameID = StrTab.add(OptDiag->RemarkName).first; + auto FunctionID = StrTab.add(FN).first; + io.mapRequired("Pass", PassID); + io.mapRequired("Name", NameID); + if (!io.outputting() || DL.isValid()) + io.mapOptional("DebugLoc", DL); + io.mapRequired("Function", FunctionID); + io.mapOptional("Hotness", OptDiag->Hotness); + io.mapOptional("Args", OptDiag->Args); + } else { + io.mapRequired("Pass", PassName); + io.mapRequired("Name", OptDiag->RemarkName); + if (!io.outputting() || DL.isValid()) + io.mapOptional("DebugLoc", DL); + io.mapRequired("Function", FN); + io.mapOptional("Hotness", OptDiag->Hotness); + io.mapOptional("Args", OptDiag->Args); + } } template <> struct MappingTraits { @@ -430,7 +447,15 @@ unsigned Line = DL.getLine(); unsigned Col = DL.getColumn(); - io.mapRequired("File", File); + if (UseStringTable) { + remarks::StringTable &StrTab = + reinterpret_cast(io.getContext())->getStringTable(); + auto FileID = StrTab.add(File).first; + io.mapRequired("File", FileID); + } else { + io.mapRequired("File", File); + } + io.mapRequired("Line", Line); io.mapRequired("Column", Col); } @@ -459,12 +484,18 @@ template <> struct MappingTraits { static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) { assert(io.outputting() && "input not yet implemented"); - // Emit a string block scalar for multiline strings, to preserve newlines. - if (StringRef(A.Val).count('\n') > 1) { + + if (UseStringTable) { + remarks::StringTable &StrTab = + reinterpret_cast(io.getContext())->getStringTable(); + auto ValueID = StrTab.add(A.Val).first; + io.mapRequired(A.Key.data(), ValueID); + } else if (StringRef(A.Val).count('\n') > 1) { StringBlockVal S(A.Val); io.mapRequired(A.Key.data(), S); - } else + } else { io.mapRequired(A.Key.data(), A.Val); + } if (A.Loc.isValid()) io.mapOptional("DebugLoc", A.Loc); } Index: llvm/lib/IR/LLVMBuild.txt =================================================================== --- llvm/lib/IR/LLVMBuild.txt +++ llvm/lib/IR/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Core parent = Libraries -required_libraries = BinaryFormat Support +required_libraries = BinaryFormat Remarks Support Index: llvm/lib/IR/RemarkStreamer.cpp =================================================================== --- llvm/lib/IR/RemarkStreamer.cpp +++ llvm/lib/IR/RemarkStreamer.cpp @@ -17,7 +17,7 @@ RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS) : Filename(Filename), OS(OS), - YAMLOutput(OS, reinterpret_cast(this)) { + YAMLOutput(OS, reinterpret_cast(this)), StrTab() { assert(!Filename.empty() && "This needs to be a real filename."); } Index: llvm/lib/Remarks/CMakeLists.txt =================================================================== --- llvm/lib/Remarks/CMakeLists.txt +++ llvm/lib/Remarks/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMRemarks Remark.cpp RemarkParser.cpp + RemarkStringTable.cpp YAMLRemarkParser.cpp ) Index: llvm/lib/Remarks/RemarkParser.cpp =================================================================== --- llvm/lib/Remarks/RemarkParser.cpp +++ llvm/lib/Remarks/RemarkParser.cpp @@ -22,6 +22,9 @@ Parser::Parser(StringRef Buf) : Impl(llvm::make_unique(Buf)) {} +Parser::Parser(StringRef Buf, StringRef StrTabBuf) + : Impl(llvm::make_unique(Buf, StrTabBuf)) {} + Parser::~Parser() = default; static Expected getNextYAML(YAMLParserImpl &Impl) { @@ -56,6 +59,33 @@ llvm_unreachable("Get next called with an unknown parsing implementation."); } +ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) { + while (!InBuffer.empty()) { + // Strings are separated by '\0' bytes. + std::pair Split = InBuffer.split('\0'); + // We only store the offset from the beginning of the buffer. + Offsets.push_back(Split.first.data() - Buffer.data()); + InBuffer = Split.second; + } +} + +Expected ParsedStringTable::operator[](size_t Index) { + if (Index >= Offsets.size()) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "String with index %u is out of bounds (size = %u).", Index, + Offsets.size()); + + size_t Offset = Offsets[Index]; + // If it's the last offset, we can't use the next offset to know the size of + // the string. + if (Index == Offsets.size() - 1) + return StringRef(Buffer.data() + Offset, Buffer.size() - Offset - 1); + + size_t NextOffset = Offsets[Index + 1]; + return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1); +} + // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef) Index: llvm/lib/Remarks/RemarkStringTable.cpp =================================================================== --- /dev/null +++ llvm/lib/Remarks/RemarkStringTable.cpp @@ -0,0 +1,54 @@ +//===- RemarkStringTable.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the Remark string table used at remark generation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/RemarkStringTable.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/Error.h" +#include + +using namespace llvm; +using namespace llvm::remarks; + +std::pair StringTable::add(StringRef Str) { + size_t NextID = StrTab.size(); + auto KV = StrTab.insert({Str, NextID}); + // If it's a new string, add it to the final size. + if (KV.second) + SerializedSize += KV.first->first().size() + 1; // +1 for the '\0' + // Can be either NextID or the previous ID if the string is already there. + return {KV.first->second, KV.first->first()}; +} + +unsigned StringTable::get(StringRef Str) const { + auto Found = StrTab.find(Str); + assert(Found != StrTab.end() && "string not present in the string table"); + return Found->second; +} + +void StringTable::serialize(raw_ostream &OS) const { + // Emit the number of strings. + uint64_t StrTabSize = SerializedSize; + support::endian::write(OS, StrTabSize, support::little); + // Emit the sequence of strings. + for (StringRef Str : serialize()) { + OS << Str; + // Explicitly emit a '\0'. + OS.write('\0'); + } +} + +std::vector StringTable::serialize() const { + std::vector Strings{StrTab.size()}; + for (const auto &KV : StrTab) + Strings[KV.second] = KV.first(); + return Strings; +} Index: llvm/lib/Remarks/YAMLRemarkParser.h =================================================================== --- llvm/lib/Remarks/YAMLRemarkParser.h +++ llvm/lib/Remarks/YAMLRemarkParser.h @@ -17,6 +17,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/Error.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" @@ -38,7 +39,8 @@ raw_string_ostream ErrorStream; /// Temporary parsing buffer for the arguments. SmallVector TmpArgs; - + /// The string table used for parsing strings. + Optional StrTab; /// The state used by the parser to parse a remark entry. Invalidated with /// every call to `parseYAMLElement`. struct ParseState { @@ -57,10 +59,13 @@ /// not be containing any value. Optional State; - YAMLRemarkParser(StringRef Buf) + YAMLRemarkParser(StringRef Buf, Optional StrTabBuf = None) : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString), - TmpArgs() { + TmpArgs(), StrTab() { SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this); + + if (StrTabBuf) + StrTab.emplace(*StrTabBuf); } /// Parse a YAML element. @@ -122,8 +127,8 @@ /// Set to `true` if we had any errors during parsing. bool HasErrors = false; - YAMLParserImpl(StringRef Buf) - : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf), + YAMLParserImpl(StringRef Buf, Optional StrTabBuf = None) + : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf, StrTabBuf), YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {} static bool classof(const ParserImpl *PI) { Index: llvm/lib/Remarks/YAMLRemarkParser.cpp =================================================================== --- llvm/lib/Remarks/YAMLRemarkParser.cpp +++ llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -34,7 +34,19 @@ auto *Value = dyn_cast(Node.getValue()); if (!Value) return make_error("expected a value of scalar type.", Node); - StringRef Tmp = Value->getRawValue(); + StringRef Tmp; + if (!StrTab) { + Tmp = Value->getRawValue(); + } else { + // If we have a string table, parse it as an unsigned. + unsigned StrID = 0; + if (Error E = parseUnsigned(StrID, Node)) + return E; + if (Expected Str = (*StrTab)[StrID]) + Tmp = *Str; + else + return Str.takeError(); + } if (Tmp.front() == '\'') Tmp = Tmp.drop_front(); Index: llvm/test/CodeGen/X86/remarks-section.ll =================================================================== --- llvm/test/CodeGen/X86/remarks-section.ll +++ llvm/test/CodeGen/X86/remarks-section.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-linux -remarks-section -pass-remarks-output=%/t.yaml | FileCheck -DPATH=%/t.yaml %s ; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN -DPATH=%/t.yaml %s +; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -remarks-yaml-string-table -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-STRTAB -DPATH=%/t.yaml %s ; CHECK-LABEL: func1: @@ -11,6 +12,11 @@ ; The version: ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00 ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; The string table size: +; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; The string table: +; EMPTY ; The remark file path: ; CHECK-NEXT: .ascii "[[PATH]]" ; Null-terminator: @@ -24,10 +30,50 @@ ; The version: ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00 ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; The string table size: +; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; The string table: +; EMPTY ; The remark file path: ; CHECK-DARWIN-NEXT: .ascii "[[PATH]]" ; Null-terminator: ; CHECK-DARWIN-NEXT: .byte 0 + +; CHECK-DARWIN-STRTAB: .section __LLVM,__remarks,regular,debug +; The magic number: +; CHECK-DARWIN-STRTAB-NEXT: .ascii "REMARKS" +; Null-terminator: +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; The version: +; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; The size of the string table: +; CHECK-DARWIN-STRTAB-NEXT: .byte 0x71, 0x00, 0x00, 0x00 +; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00 +; The string table: +; CHECK-DARWIN-STRTAB-NEXT: .ascii "prologepilog" +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .ascii "StackSize" +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .ascii "func1" +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .byte 48 +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .ascii " stack bytes in function" +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .ascii "asm-printer" +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .ascii "InstructionCount" +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .byte 49 +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; CHECK-DARWIN-STRTAB-NEXT: .ascii " instructions in function" +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 +; The remark file path: +; CHECK-DARWIN-STRTAB-NEXT: .ascii "[[PATH]]" +; Null-terminator: +; CHECK-DARWIN-STRTAB-NEXT: .byte 0 define void @func1() { ret void } Index: llvm/unittests/Remarks/CMakeLists.txt =================================================================== --- llvm/unittests/Remarks/CMakeLists.txt +++ llvm/unittests/Remarks/CMakeLists.txt @@ -4,5 +4,6 @@ ) add_llvm_unittest(RemarksTests + RemarksStrTabParsingTest.cpp YAMLRemarksParsingTest.cpp ) Index: llvm/unittests/Remarks/RemarksStrTabParsingTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Remarks/RemarksStrTabParsingTest.cpp @@ -0,0 +1,39 @@ +//===- unittest/Support/RemarksStrTabParsingTest.cpp - StrTab tests -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkParser.h" +#include "gtest/gtest.h" + +using namespace llvm; + +TEST(RemarksStrTab, ParsingEmpty) { + StringRef Empty("", 0); + remarks::ParsedStringTable StrTab(Empty); + Expected Nothing = StrTab[0]; + EXPECT_FALSE(static_cast(Nothing)); + EXPECT_EQ(toString(Nothing.takeError()), + "String with index 0 is out of bounds (size = 0)."); +} + +TEST(RemarksStrTab, ParsingGood) { + StringRef Strings("str1\0str2\0str3\0str4", 20); + remarks::ParsedStringTable StrTab(Strings); + Expected Result = StrTab[0]; + EXPECT_TRUE(static_cast(Result)); + EXPECT_EQ(*Result, "str1"); + Result = StrTab[1]; + EXPECT_TRUE(static_cast(Result)); + EXPECT_EQ(*Result, "str2"); + Result = StrTab[2]; + EXPECT_TRUE(static_cast(Result)); + EXPECT_EQ(*Result, "str3"); + Result = StrTab[3]; + EXPECT_TRUE(static_cast(Result)); + EXPECT_EQ(*Result, "str4"); +} Index: llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp =================================================================== --- llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp +++ llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp @@ -492,3 +492,105 @@ EXPECT_FALSE(LLVMRemarkParserHasError(Parser)); LLVMRemarkParserDispose(Parser); } + +TEST(YAMLRemarks, ContentsStrTab) { + StringRef Buf = "\n" + "--- !Missed\n" + "Pass: 0\n" + "Name: 1\n" + "DebugLoc: { File: 2, Line: 3, Column: 12 }\n" + "Function: 3\n" + "Hotness: 4\n" + "Args:\n" + " - Callee: 5\n" + " - String: 7\n" + " - Caller: 3\n" + " DebugLoc: { File: 2, Line: 2, Column: 0 }\n" + " - String: 8\n" + "\n"; + + StringRef StrTabBuf = + StringRef("inline\0NoDefinition\0file.c\0foo\0Callee\0bar\0String\0 " + "will not be inlined into \0 because its definition is " + "unavailable", + 115); + + remarks::Parser Parser(Buf, StrTabBuf); + Expected RemarkOrErr = Parser.getNext(); + EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); + EXPECT_TRUE(*RemarkOrErr != nullptr); + + const remarks::Remark &Remark = **RemarkOrErr; + EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed); + EXPECT_EQ(checkStr(Remark.PassName, 6), "inline"); + EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition"); + EXPECT_EQ(checkStr(Remark.FunctionName, 3), "foo"); + EXPECT_TRUE(Remark.Loc); + const remarks::RemarkLocation &RL = *Remark.Loc; + EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c"); + EXPECT_EQ(RL.SourceLine, 3U); + EXPECT_EQ(RL.SourceColumn, 12U); + EXPECT_TRUE(Remark.Hotness); + EXPECT_EQ(*Remark.Hotness, 4U); + EXPECT_EQ(Remark.Args.size(), 4U); + + unsigned ArgID = 0; + for (const remarks::Argument &Arg : Remark.Args) { + switch (ArgID) { + case 0: + EXPECT_EQ(checkStr(Arg.Key, 6), "Callee"); + EXPECT_EQ(checkStr(Arg.Val, 3), "bar"); + EXPECT_FALSE(Arg.Loc); + break; + case 1: + EXPECT_EQ(checkStr(Arg.Key, 6), "String"); + EXPECT_EQ(checkStr(Arg.Val, 26), " will not be inlined into "); + EXPECT_FALSE(Arg.Loc); + break; + case 2: { + EXPECT_EQ(checkStr(Arg.Key, 6), "Caller"); + EXPECT_EQ(checkStr(Arg.Val, 3), "foo"); + EXPECT_TRUE(Arg.Loc); + const remarks::RemarkLocation &RL = *Arg.Loc; + EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c"); + EXPECT_EQ(RL.SourceLine, 2U); + EXPECT_EQ(RL.SourceColumn, 0U); + break; + } + case 3: + EXPECT_EQ(checkStr(Arg.Key, 6), "String"); + EXPECT_EQ(checkStr(Arg.Val, 38), + " because its definition is unavailable"); + EXPECT_FALSE(Arg.Loc); + break; + default: + break; + } + ++ArgID; + } + + RemarkOrErr = Parser.getNext(); + EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); + EXPECT_EQ(*RemarkOrErr, nullptr); +} + +TEST(YAMLRemarks, ParsingBadStringTableIndex) { + StringRef Buf = "\n" + "--- !Missed\n" + "Pass: 50\n" + "\n"; + + StringRef StrTabBuf = StringRef("inline"); + + remarks::Parser Parser(Buf, StrTabBuf); + Expected Remark = Parser.getNext(); + EXPECT_FALSE(Remark); // Expect an error here. + + std::string ErrorStr; + raw_string_ostream Stream(ErrorStr); + handleAllErrors(Remark.takeError(), + [&](const ErrorInfoBase &EIB) { EIB.log(Stream); }); + EXPECT_TRUE( + StringRef(Stream.str()) + .contains("String with index 50 is out of bounds (size = 1).")); +}