diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h --- a/llvm/include/llvm/Object/Wasm.h +++ b/llvm/include/llvm/Object/Wasm.h @@ -104,12 +104,14 @@ struct WasmSection { WasmSection() = default; - uint32_t Type = 0; // Section type (See below) - uint32_t Offset = 0; // Offset with in the file + uint32_t Type = 0; + uint32_t Offset = 0; // Offset within the file StringRef Name; // Section name (User-defined sections only) uint32_t Comdat = UINT32_MAX; // From the "comdat info" section - ArrayRef Content; // Section content - std::vector Relocations; // Relocations for this section + ArrayRef Content; + std::vector Relocations; + // Length of the LEB encoding of the section header's size field + std::optional HeaderSecSizeEncodingLen; }; struct WasmSegment { diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h --- a/llvm/include/llvm/ObjectYAML/WasmYAML.h +++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h @@ -189,6 +189,7 @@ SectionType Type; std::vector Relocations; + std::optional HeaderSecSizeEncodingLen; }; struct CustomSection : Section { diff --git a/llvm/lib/ObjCopy/wasm/WasmObject.h b/llvm/lib/ObjCopy/wasm/WasmObject.h --- a/llvm/lib/ObjCopy/wasm/WasmObject.h +++ b/llvm/lib/ObjCopy/wasm/WasmObject.h @@ -23,6 +23,7 @@ // For now, each section is only an opaque binary blob with no distinction // between custom and known sections. uint8_t SectionType; + std::optional HeaderSecSizeEncodingLen; StringRef Name; ArrayRef Contents; }; diff --git a/llvm/lib/ObjCopy/wasm/WasmReader.cpp b/llvm/lib/ObjCopy/wasm/WasmReader.cpp --- a/llvm/lib/ObjCopy/wasm/WasmReader.cpp +++ b/llvm/lib/ObjCopy/wasm/WasmReader.cpp @@ -22,8 +22,8 @@ Obj->Sections.reserve(WasmObj.getNumSections()); for (const SectionRef &Sec : WasmObj.sections()) { const WasmSection &WS = WasmObj.getWasmSection(Sec); - Obj->Sections.push_back( - {static_cast(WS.Type), WS.Name, WS.Content}); + Obj->Sections.push_back({static_cast(WS.Type), + WS.HeaderSecSizeEncodingLen, WS.Name, WS.Content}); // Give known sections standard names to allow them to be selected. (Custom // sections already have their names filled in by the parser). Section &ReaderSec = Obj->Sections.back(); diff --git a/llvm/lib/ObjCopy/wasm/WasmWriter.cpp b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp --- a/llvm/lib/ObjCopy/wasm/WasmWriter.cpp +++ b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp @@ -29,16 +29,19 @@ SectionSize = S.Contents.size(); if (HasName) SectionSize += getULEB128Size(S.Name.size()) + S.Name.size(); - // Pad the LEB value out to 5 bytes to make it a predictable size, and - // match the behavior of clang. - encodeULEB128(SectionSize, OS, 5); + // If we read this section from an object file, use its original size for the + // padding of the LEB value to avoid changing the file size. Otherwise, pad + // out to 5 bytes to make it predictable, and match the behavior of clang. + unsigned HeaderSecSizeEncodingLen = + S.HeaderSecSizeEncodingLen ? *S.HeaderSecSizeEncodingLen : 5; + encodeULEB128(SectionSize, OS, HeaderSecSizeEncodingLen); if (HasName) { encodeULEB128(S.Name.size(), OS); OS << S.Name; } // Total section size is the content size plus 1 for the section type and - // 5 for the LEB-encoded size. - SectionSize = SectionSize + 1 + 5; + // the LEB-encoded size. + SectionSize = SectionSize + 1 + HeaderSecSizeEncodingLen; return Header; } diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -268,7 +268,11 @@ Section.Offset = Ctx.Ptr - Ctx.Start; Section.Type = readUint8(Ctx); LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n"); + // When reading the section's size, store the size of the LEB used to encode + // it. This allows objcopy/strip to reproduce the binary identically. + const uint8_t *PreSizePtr = Ctx.Ptr; uint32_t Size = readVaruint32(Ctx); + Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr; if (Size == 0) return make_error("zero length section", object_error::parse_failed); diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp --- a/llvm/lib/ObjectYAML/WasmEmitter.cpp +++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp @@ -646,8 +646,18 @@ StringStream.flush(); + unsigned HeaderSecSizeEncodingLen = + Sec->HeaderSecSizeEncodingLen ? *Sec->HeaderSecSizeEncodingLen : 5; + unsigned RequiredLen = getULEB128Size(OutString.size()); + // Wasm spec does not allow LEBs larger than 5 bytes + assert(RequiredLen <= 5); + if (HeaderSecSizeEncodingLen < RequiredLen) { + reportError("section header length can't be encoded in a LEB of size " + + Twine(HeaderSecSizeEncodingLen)); + return false; + } // Write the section size followed by the content - encodeULEB128(OutString.size(), OS); + encodeULEB128(OutString.size(), OS, HeaderSecSizeEncodingLen); OS << OutString; } diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp --- a/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -45,6 +45,7 @@ static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) { IO.mapRequired("Type", Section.Type); IO.mapOptional("Relocations", Section.Relocations); + IO.mapOptional("HeaderSecSizeEncodingLen", Section.HeaderSecSizeEncodingLen); } static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) { diff --git a/llvm/test/ObjectYAML/wasm/section_header_size.yaml b/llvm/test/ObjectYAML/wasm/section_header_size.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/ObjectYAML/wasm/section_header_size.yaml @@ -0,0 +1,91 @@ +## Test that obj2yaml output includes the section header size encoding length +## only when the length isn't padded to 5 bytes. +# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s + +--- !WASM +FileHeader: + Version: 0x1 +Sections: + - Type: TYPE + HeaderSecSizeEncodingLen: 3 + Signatures: + - Index: 0 + ParamTypes: + - I32 + - I32 + ReturnTypes: + - I32 + - Type: FUNCTION + HeaderSecSizeEncodingLen: 4 + FunctionTypes: [ 0 ] + - Type: MEMORY + HeaderSecSizeEncodingLen: 1 + Memories: + - Flags: [ HAS_MAX ] + Minimum: 0x100 + Maximum: 0x100 + - Type: EXPORT + HeaderSecSizeEncodingLen: 5 + Exports: + - Name: add + Kind: FUNCTION + Index: 0 + - Type: CODE + HeaderSecSizeEncodingLen: 2 + Functions: + - Index: 0 + Locals: [] + Body: 200020016A0B +... +# CHECK: --- !WASM +# CHECK-NEXT: FileHeader: +# CHECK-NEXT: Version: 0x1 +# CHECK-NEXT: Sections: +# CHECK-NEXT: - Type: TYPE +# CHECK-NEXT: HeaderSecSizeEncodingLen: 3 +# CHECK-NEXT: Signatures: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: ParamTypes: +# CHECK-NEXT: - I32 +# CHECK-NEXT: - I32 +# CHECK-NEXT: ReturnTypes: +# CHECK-NEXT: - I32 +# CHECK-NEXT: - Type: FUNCTION +# CHECK-NEXT: HeaderSecSizeEncodingLen: 4 +# CHECK-NEXT: FunctionTypes: [ 0 ] +# CHECK-NEXT: - Type: MEMORY +# CHECK-NEXT: Memories: +# CHECK-NEXT: - Flags: [ HAS_MAX ] +# CHECK-NEXT: Minimum: 0x100 +# CHECK-NEXT: Maximum: 0x100 +# CHECK-NEXT: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: add +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Type: CODE +# CHECK-NEXT: HeaderSecSizeEncodingLen: 2 +# CHECK-NEXT: Functions: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 200020016A0B + +## Test if we correctly error out if the provided section header size is less +## than the size required. +# RUN: not yaml2obj --docnum=2 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID +# INVALID: yaml2obj: error: section header length can't be encoded in a LEB of size 0 + +--- !WASM +FileHeader: + Version: 0x1 +Sections: + - Type: TYPE + HeaderSecSizeEncodingLen: 0 + Signatures: + - Index: 0 + ParamTypes: + - I32 + - I32 + ReturnTypes: + - I32 +... diff --git a/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test b/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test @@ -0,0 +1,41 @@ +## Test that objcopy generates section headers that are identical to those from +## the input binary, including the encoded size of the LEB that represents the +## section size. + +# RUN: yaml2obj %s -o %t.wasm +# RUN: llvm-objcopy %t.wasm %t.wasm.copy +# RUN: diff %t.wasm %t.wasm.copy + +--- !WASM +FileHeader: + Version: 0x1 +Sections: + - Type: TYPE + HeaderSecSizeEncodingLen: 3 + Signatures: + - Index: 0 + ParamTypes: + - I32 + - I32 + ReturnTypes: + - I32 + - Type: FUNCTION + HeaderSecSizeEncodingLen: 4 + FunctionTypes: [ 0 ] + - Type: MEMORY + HeaderSecSizeEncodingLen: 1 + Memories: + - Flags: [ HAS_MAX ] + Minimum: 0x100 + Maximum: 0x100 + - Type: EXPORT + HeaderSecSizeEncodingLen: 5 + Exports: + - Name: add + Kind: FUNCTION + Index: 0 + - Type: CODE + Functions: + - Index: 0 + Locals: [] + Body: 200020016A0B diff --git a/llvm/tools/obj2yaml/wasm2yaml.cpp b/llvm/tools/obj2yaml/wasm2yaml.cpp --- a/llvm/tools/obj2yaml/wasm2yaml.cpp +++ b/llvm/tools/obj2yaml/wasm2yaml.cpp @@ -10,6 +10,7 @@ #include "llvm/Object/COFF.h" #include "llvm/ObjectYAML/WasmYAML.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/YAMLTraits.h" using namespace llvm; @@ -392,6 +393,17 @@ llvm_unreachable("Unknown section type"); break; } + + // Only propagate the section size encoding length if it's not the minimal + // size or 5 (the default "padded" value). This is to avoid having every + // YAML output polluted with this value when we usually don't care about it + // (and avoid rewriting all the test expectations). + if (WasmSec.HeaderSecSizeEncodingLen && + WasmSec.HeaderSecSizeEncodingLen != + getULEB128Size(WasmSec.Content.size()) && + WasmSec.HeaderSecSizeEncodingLen != 5) + S->HeaderSecSizeEncodingLen = WasmSec.HeaderSecSizeEncodingLen; + for (const wasm::WasmRelocation &Reloc : WasmSec.Relocations) { WasmYAML::Relocation R; R.Type = Reloc.Type;