Index: llvm/test/tools/obj2yaml/elf-output-indentation.yaml =================================================================== --- /dev/null +++ llvm/test/tools/obj2yaml/elf-output-indentation.yaml @@ -0,0 +1,304 @@ +## obj2yaml tries to optimize indentations between keys and +## values to make the output nicer. +## In this test we demonstrate the output produced. + +# RUN: yaml2obj %s -o %t +# RUN: obj2yaml %t | FileCheck %s --strict-whitespace --match-full-lines + +# CHECK:--- !ELF +# CHECK-NEXT:FileHeader: +# CHECK-NEXT: Class: ELFCLASS64 +# CHECK-NEXT: Data: ELFDATA2LSB +# CHECK-NEXT: Type: ET_REL +# CHECK-NEXT: Machine: EM_X86_64 +# CHECK-NEXT:Sections: +# CHECK-NEXT: - Name: .dynamic +# CHECK-NEXT: Type: SHT_DYNAMIC +# CHECK-NEXT: Flags: [ SHF_ALLOC ] +# CHECK-NEXT: Address: 0x0000000000000008 +# CHECK-NEXT: AddressAlign: 0x0000000000000008 +# CHECK-NEXT: EntSize: 0x0000000000000010 +# CHECK-NEXT: Entries: +# CHECK-NEXT: - Tag: DT_STRSZ +# CHECK-NEXT: Value: 0x0000000000000001 +# CHECK-NEXT: - Tag: DT_NULL +# CHECK-NEXT: Value: 0x0000000000000000 +# CHECK-NEXT: - Name: .symtab_shndx +# CHECK-NEXT: Type: SHT_SYMTAB_SHNDX +# CHECK-NEXT: Link: .symtab +# CHECK-NEXT: EntSize: 0x0000000000000004 +# CHECK-NEXT: Entries: [ 8, 9, 10, 11, 8, 9, 10, 11, 9, 10, 11, 8, 8, 9, +# CHECK-NEXT: 10, 11, 8 ] +# CHECK-NEXT: - Name: .text +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: - Name: .data +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: AddressAlign: 0x0000000000000001 +# CHECK-NEXT: - Name: .rela.text +# CHECK-NEXT: Type: SHT_RELA +# CHECK-NEXT: Link: .symtab +# CHECK-NEXT: EntSize: 0x0000000000000018 +# CHECK-NEXT: Info: .text +# CHECK-NEXT: Relocations: +# CHECK-NEXT: - Offset: 0x0000000000000000 +# CHECK-NEXT: Type: R_X86_64_PC32 +# CHECK-NEXT: - Offset: 0x0000000000000001 +# CHECK-NEXT: Type: R_X86_64_PC64 +# CHECK-NEXT: - Name: .rela.text2 +# CHECK-NEXT: Type: SHT_RELA +# CHECK-NEXT: Link: .symtab +# CHECK-NEXT: EntSize: 0x0000000000000018 +# CHECK-NEXT: - Name: .group +# CHECK-NEXT: Type: SHT_GROUP +# CHECK-NEXT: Link: .symtab +# CHECK-NEXT: EntSize: 0x0000000000000004 +# CHECK-NEXT: Info: foo +# CHECK-NEXT: Members: +# CHECK-NEXT: - SectionOrType: GRP_COMDAT +# CHECK-NEXT: - SectionOrType: .text.foo +# CHECK-NEXT: - Name: .text.foo +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: - Name: '.group [1]' +# CHECK-NEXT: Type: SHT_GROUP +# CHECK-NEXT: Link: .symtab +# CHECK-NEXT: EntSize: 0x0000000000000004 +# CHECK-NEXT: Info: 'foo [1]' +# CHECK-NEXT: Members: +# CHECK-NEXT: - SectionOrType: GRP_COMDAT +# CHECK-NEXT: - SectionOrType: '.text.foo [1]' +# CHECK-NEXT: - Name: '.text.foo [1]' +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: - Name: .gnu.hash +# CHECK-NEXT: Type: SHT_GNU_HASH +# CHECK-NEXT: Flags: [ SHF_ALLOC ] +# CHECK-NEXT: Header: +# CHECK-NEXT: SymNdx: 0x00000001 +# CHECK-NEXT: Shift2: 0x00000002 +# CHECK-NEXT: BloomFilter: [ 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, +# CHECK-NEXT: 0x0000000000000006, 0x0000000000000007, 0x0000000000000004, +# CHECK-NEXT: 0x0000000000000005, 0x0000000000000006, 0x0000000000000007 ] +# CHECK-NEXT: HashBuckets: [ 0x00000005, 0x00000006, 0x00000007 ] +# CHECK-NEXT: HashValues: [ 0x00000008, 0x00000009, 0x0000000A, 0x0000000B, +# CHECK-NEXT: 0x00000008, 0x00000009, 0x0000000A, 0x0000000B, +# CHECK-NEXT: 0x00000009, 0x0000000A, 0x0000000B, 0x00000008, +# CHECK-NEXT: 0x00000009, 0x0000000A, 0x0000000B ] +# CHECK-NEXT: - Name: .gnu.version_d +# CHECK-NEXT: Type: SHT_GNU_verdef +# CHECK-NEXT: Flags: [ SHF_ALLOC ] +# CHECK-NEXT: Address: 0x0000000000000230 +# CHECK-NEXT: Link: .dynstr +# CHECK-NEXT: AddressAlign: 0x0000000000000004 +# CHECK-NEXT: Info: 0x0000000000000004 +# CHECK-NEXT: Entries: +# CHECK-NEXT: - Version: 1 +# CHECK-NEXT: Flags: 1 +# CHECK-NEXT: VersionNdx: 1 +# CHECK-NEXT: Hash: 170240160 +# CHECK-NEXT: Names: +# CHECK-NEXT: - dso.so.0 +# CHECK-NEXT: - Version: 1 +# CHECK-NEXT: Flags: 2 +# CHECK-NEXT: VersionNdx: 2 +# CHECK-NEXT: Hash: 108387921 +# CHECK-NEXT: Names: +# CHECK-NEXT: - VERSION_1 +# CHECK-NEXT: - Version: 1 +# CHECK-NEXT: Flags: 3 +# CHECK-NEXT: VersionNdx: 3 +# CHECK-NEXT: Hash: 108387922 +# CHECK-NEXT: Names: +# CHECK-NEXT: - VERSION_2 +# CHECK-NEXT: - VERSION_3 +# CHECK-NEXT: - Name: .gnu.version_r +# CHECK-NEXT: Type: SHT_GNU_verneed +# CHECK-NEXT: Flags: [ SHF_ALLOC ] +# CHECK-NEXT: Link: .dynstr +# CHECK-NEXT: Info: 0x0000000000000001 +# CHECK-NEXT: Dependencies: +# CHECK-NEXT: - Version: 1 +# CHECK-NEXT: File: dso.so.0 +# CHECK-NEXT: Entries: +# CHECK-NEXT: - Name: v1 +# CHECK-NEXT: Hash: 1937 +# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Other: 3 +# CHECK-NEXT: - Name: v2 +# CHECK-NEXT: Hash: 1938 +# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Other: 4 +# CHECK-NEXT:Symbols: +# CHECK-NEXT: - Name: foo +# CHECK-NEXT: Section: .text.foo +# CHECK-NEXT: - Name: 'foo [1]' +# CHECK-NEXT: Section: '.text.foo [1]' +# CHECK-NEXT: - Name: zed1 +# CHECK-NEXT: - Name: zed2 +# CHECK-NEXT: - Name: zed3 +# CHECK-NEXT: - Name: zed4 +# CHECK-NEXT: - Name: zed5 +# CHECK-NEXT: - Name: zed6 +# CHECK-NEXT: Binding: STB_GLOBAL +# CHECK-NEXT: - Name: zed7 +# CHECK-NEXT: - Name: zed8 +# CHECK-NEXT: - Name: zed9 +# CHECK-NEXT: - Name: zed10 +# CHECK-NEXT: - Name: zed11 +# CHECK-NEXT: - Name: zed12 +# CHECK-NEXT: - Name: zed13 +# CHECK-NEXT: Section: .text.foo +# CHECK-NEXT: Binding: STB_GLOBAL +# CHECK-NEXT: - Name: zed14 +# CHECK-NEXT:DynamicSymbols: +# CHECK-NEXT: - Name: dynamic +# CHECK-NEXT: Binding: STB_GLOBAL +# CHECK-NEXT: Value: 0x0000000012345678 +# CHECK-NEXT: - Name: both + +## Use a YAML document with arbitrary fields and values to +## show how the indentation is affected. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_ALLOC ] + Address: 0x0000000000000008 + AddressAlign: 0x0000000000000008 + EntSize: 0x0000000000000010 + Entries: + - Tag: DT_STRSZ + Value: 0x0000000000000001 + - Tag: DT_NULL + Value: 0x0000000000000000 + - Name: .symtab_shndx + Type: SHT_SYMTAB_SHNDX + Entries: [0x8, 0x9, 0xA, 0xB, 0x8, 0x9, 0xA, 0xB, 0x9, 0xA, 0xB, 0x8, + 0x8, 0x9, 0xA, 0xB, 0x8] + Link: .symtab + - Name: .text + Type: SHT_PROGBITS + - Name: .data + Type: SHT_PROGBITS + AddressAlign: 0x1 + - Name: .rela.text + Type: SHT_RELA + Link: .symtab + EntSize: 0x0000000000000018 + Info: .text + Relocations: + - Offset: 0x0000000000000000 + Type: R_X86_64_PC32 + - Offset: 0x0000000000000001 + Type: R_X86_64_PC64 + - Name: .rela.text2 + Type: SHT_RELA + Link: .symtab + EntSize: 0x0000000000000018 + Relocations: [] + - Name: .group + Type: SHT_GROUP + Info: foo + Members: + - SectionOrType: GRP_COMDAT + - SectionOrType: .text.foo + - Name: .text.foo + Type: SHT_PROGBITS + - Name: '.group [1]' + Type: SHT_GROUP + Info: 'foo [1]' + Members: + - SectionOrType: GRP_COMDAT + - SectionOrType: '.text.foo [1]' + - Name: '.text.foo [1]' + Type: SHT_PROGBITS + - Name: .gnu.hash + Type: SHT_GNU_HASH + Flags: [ SHF_ALLOC ] + Header: + SymNdx: 0x1 + Shift2: 0x2 + BloomFilter: [0x3, 0x4, 0x5, 0x6, 0x7, 0x4, 0x5, 0x6, 0x7] + HashBuckets: [0x5, 0x6, 0x7] + HashValues: [0x8, 0x9, 0xA, 0xB, 0x8, 0x9, 0xA, 0xB, 0x9, 0xA, 0xB, 0x8, 0x9, 0xA, 0xB] + Link: 0x0 + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Address: 0x0000000000000230 + Link: .dynstr + AddressAlign: 0x0000000000000004 + Info: 0x0000000000000004 + Entries: + - Version: 1 + Flags: 1 + VersionNdx: 1 + Hash: 170240160 + Names: + - dso.so.0 + - Version: 1 + Flags: 2 + VersionNdx: 2 + Hash: 108387921 + Names: + - VERSION_1 + - Version: 1 + Flags: 3 + VersionNdx: 3 + Hash: 108387922 + Names: + - VERSION_2 + - VERSION_3 + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x0000000000000001 + Dependencies: + - Version: 1 + File: dso.so.0 + Entries: + - Name: v1 + Hash: 1937 + Flags: 0 + Other: 3 + - Name: v2 + Hash: 1938 + Flags: 0 + Other: 4 +Symbols: + - Name: foo + Section: .text.foo + - Name: 'foo [1]' + Section: '.text.foo [1]' + - Name: zed1 + - Name: zed2 + - Name: zed3 + - Name: zed4 + - Name: zed5 + - Name: zed6 + Binding: STB_GLOBAL + - Name: zed7 + - Name: zed8 + - Name: zed9 + - Name: zed10 + - Name: zed11 + - Name: zed12 + - Name: zed13 + Binding: STB_GLOBAL + Section: .text.foo + - Name: zed14 +ProgramHeaders: + - Type: PT_LOAD + VAddr: 0x100 + Sections: + - Section: .text +DynamicSymbols: + - Name: dynamic + Binding: STB_GLOBAL + Value: 0x0000000012345678 + - Name: both Index: llvm/tools/obj2yaml/obj2yaml.cpp =================================================================== --- llvm/tools/obj2yaml/obj2yaml.cpp +++ llvm/tools/obj2yaml/obj2yaml.cpp @@ -17,6 +17,159 @@ using namespace llvm; using namespace llvm::object; +// This is used to post-process an YAML output which usually contains a +// lot of excessive indentation spaces between keys and values. Here we +// remove them to make an output nicer. +std::string postProcess(StringRef In) { + struct LineInfo { + // The length of YAML document key that was found on the line, or + // 0 when no key was found. + size_t KeyLen; + // The index of ":" character. + size_t Colon; + // The original line. + StringRef Line; + + // A YAML string that ends with a comma is incomplete. The method + // returns if a string is complete. + bool IsComplete() const { return !Line.rtrim(" ").endswith(","); } + }; + + // The state is used when scanning lines. + struct ScanState { + // Maximum key size found while scanning a current group of lines. + size_t MaxKey = 0; + size_t Level = size_t(-1); + std::vector Lines; + } State; + + // We are going to scan and collect lines one by one. We combine them + // into a group, then we reformat each line to align values and the print + // them. This helper function is used to flush the group, i.e. to format lines + // it contains and print them. + std::string Out; + auto FlushState = [&]() { + // Flag is set when the previously processed line was incomplete. + bool PrevComplete = true; + // Holds the number of spaces removed from the previous line. + size_t PrevAdjustedBy = 0; + + for (const LineInfo &LI : State.Lines) { + // When a line ends with a comma it means it is incomplete and was + // wrapped. Sometimes we have multiple incomplete lines in a row: + // + // BloomFilter: [ 0x0000000000000003, 0x0000000000000004, + // 0x0000000000000005, 0x0000000000000006, + // 0x0000000000000007 ] + // + // In this case we proccess the first line as usual, because it contains a + // key. We accumulate all other lines describing a sequence without + // any additional operations. All what need to do with them is to trim + // spaces at the begining. + if (!PrevComplete) { + Out += LI.Line.drop_front(PrevAdjustedBy).str() + '\n'; + PrevComplete = LI.IsComplete(); + continue; + } + + // Build the re-formatted key-value line. + std::string NewLine; + // Take the left part (with a key) of the original string up to colon. + // E.g. " Class:". + NewLine += LI.Line.take_front(LI.Colon + 1).str(); + // Add spaces. We calculate the number of spaces so that values are + // aligned properly and there are no excessive spaces. + NewLine += std::string(State.MaxKey - LI.KeyLen + 1, ' '); + // Add the part with a value. + NewLine += LI.Line.drop_front(LI.Colon + 1).ltrim(' '); + + Out += NewLine + '\n'; + PrevComplete = LI.IsComplete(); + PrevAdjustedBy = LI.Line.size() - NewLine.size(); + } + State = {}; + }; + + SmallVector Lines; + In.split(Lines, '\n'); + + // Here we will take YAML description lines one by one, scan them to find + // key-value pair, group them, then format and print. + for (StringRef Line : Lines) { + // If the previous line was incomplete, we can either met its ending, + // or another incomplete line. In both cases we know there is no key-value + // pair and should skip an additional processing. + if (!State.Lines.empty() && !State.Lines.back().IsComplete()) { + State.Lines.push_back({0, 0, Line}); + continue; + } + + // Lookup for a colon. When there is no colon character, it is not a + // key-value pair line. Usually it is either an end of a YAML description + // marked with "...", or an entry: + // + // Names: + // - dso.so.0 + // + // When there is a colon which is the last symbol on the line, it is usually + // a start of a new sequence, e.g. something like this: + // + // AddressAlign: 0x12 + // Info: 0x34 + // Entries: + // - Version: 1 + // + // Here, "Entries:" works like a "end of group" mark. + // + // In all cases it means we want to flush current state and print the line + // as is. + size_t Colon = Line.find(':'); + if (Colon == StringRef::npos || Colon == Line.size() - 1) { + FlushState(); + Out += Line.str() + '\n'; + continue; + } + + // We want to flush our state if the first character is a dash. + // it is used for following cases: + // + // - Name: foo + // Section: bar + // - Name: zed1 + // + // The dash that is on the line with zed1 is a mark of a new + // lines group, we need to format and print lines we collected. + if (Line.ltrim(" ").front() == '-') + FlushState(); + + // We can estimate the level of a property in the tree by counting the + // number of columns on the left. It is used for places like: + // + // Entries: + // ... + // - Name: Vn + // Hash: 1234 + // Flags: 1 + // Other: 2 + // Symbols: + // + // In this case Symbols could be a property of an entry, but it has no + // indentations on the left. Hence we know that it is a higher level key and + // we should flush the current scan state. + StringRef Key = Line.take_front(Colon).ltrim("- "); + size_t Level = Colon - Key.size(); + if (State.Level != size_t(-1) && State.Level != Level) + FlushState(); + State.Level = Level; + + // Remember the maximum key size and the line processed. + if (Key.size() > State.MaxKey) + State.MaxKey = Key.size(); + State.Lines.push_back({Key.size(), Colon, Line}); + } + return Out; +} + static Error dumpObject(const ObjectFile &Obj) { if (Obj.isCOFF()) return errorCodeToError(coff2yaml(outs(), cast(Obj))); @@ -24,8 +177,15 @@ if (Obj.isXCOFF()) return errorCodeToError(xcoff2yaml(outs(), cast(Obj))); - if (Obj.isELF()) - return elf2yaml(outs(), Obj); + if (Obj.isELF()) { + SmallString<0> Storage; + raw_svector_ostream OS(Storage); + if (Error Err = elf2yaml(OS, Obj)) + return Err; + std::string Formatted = postProcess(Storage); + outs().write(Formatted.data(), Formatted.size()); + return Error::success(); + } if (Obj.isWasm()) return errorCodeToError(wasm2yaml(outs(), cast(Obj)));