Index: lld/COFF/Driver.h =================================================================== --- lld/COFF/Driver.h +++ lld/COFF/Driver.h @@ -20,6 +20,7 @@ #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/TarWriter.h" #include #include #include @@ -74,7 +75,7 @@ ArgParser Parser; SymbolTable Symtab; - std::unique_ptr Cpio; // for /linkrepro + std::unique_ptr Tar; // for /linkrepro // Opens a file. Path has to be resolved already. MemoryBufferRef openFile(StringRef Path); Index: lld/COFF/Driver.cpp =================================================================== --- lld/COFF/Driver.cpp +++ lld/COFF/Driver.cpp @@ -24,7 +24,9 @@ #include "llvm/Option/Option.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include @@ -98,9 +100,9 @@ MemoryBufferRef MBRef = *MB; OwningMBs.push_back(std::move(MB)); - if (Driver->Cpio) - Driver->Cpio->append(relativeToRoot(MBRef.getBufferIdentifier()), - MBRef.getBuffer()); + if (Driver->Tar) + Driver->Tar->append(relativeToRoot(MBRef.getBufferIdentifier()), + MBRef.getBuffer()); return MBRef; } @@ -459,12 +461,16 @@ if (auto *Arg = Args.getLastArg(OPT_linkrepro)) { SmallString<64> Path = StringRef(Arg->getValue()); sys::path::append(Path, "repro"); - ErrorOr F = CpioFile::create(Path); - if (F) - Cpio.reset(*F); - else + + Expected> ErrOrWriter = TarWriter::create( + (StringRef(Path) + ".tar").str(), llvm::sys::path::filename(Path)); + + if (ErrOrWriter) { + Tar = std::move(*ErrOrWriter); + } else { errs() << "/linkrepro: failed to open " << Path - << ".cpio: " << F.getError().message() << '\n'; + << ".tar: " << toString(ErrOrWriter.takeError()) << '\n'; + } } if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) @@ -683,10 +689,10 @@ if (!Resources.empty()) addBuffer(convertResToCOFF(Resources)); - if (Cpio) - Cpio->append("response.txt", - createResponseFile(Args, FilePaths, - ArrayRef(SearchPaths).slice(1))); + if (Tar) + Tar->append("response.txt", + createResponseFile(Args, FilePaths, + ArrayRef(SearchPaths).slice(1))); // Handle /largeaddressaware if (Config->is64() || Args.hasArg(OPT_largeaddressaware)) Index: lld/ELF/Driver.h =================================================================== --- lld/ELF/Driver.h +++ lld/ELF/Driver.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/TarWriter.h" #include "llvm/Support/raw_ostream.h" namespace lld { @@ -29,7 +30,7 @@ void main(ArrayRef Args, bool CanExitEarly); void addFile(StringRef Path); void addLibrary(StringRef Name); - std::unique_ptr Cpio; // for reproduce + std::unique_ptr Tar; // for reproduce private: std::vector getArchiveMembers(MemoryBufferRef MB); Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include @@ -182,8 +183,8 @@ MemoryBufferRef MBRef = MB->getMemBufferRef(); make>(std::move(MB)); // take MB ownership - if (Cpio) - Cpio->append(relativeToRoot(Path), MBRef.getBuffer()); + if (Tar) + Tar->append(relativeToRoot(Path), MBRef.getBuffer()); return MBRef; } @@ -309,14 +310,16 @@ if (const char *Path = getReproduceOption(Args)) { // Note that --reproduce is a debug option so you can ignore it // if you are trying to understand the whole picture of the code. - ErrorOr F = CpioFile::create(Path); - if (F) { - Cpio.reset(*F); - Cpio->append("response.txt", createResponseFile(Args)); - Cpio->append("version.txt", getLLDVersion() + "\n"); - } else - error(F.getError(), - Twine("--reproduce: failed to open ") + Path + ".cpio"); + Expected> ErrOrWriter = TarWriter::create( + (StringRef(Path) + ".tar").str(), path::filename(Path)); + if (ErrOrWriter) { + Tar = std::move(*ErrOrWriter); + Tar->append("response.txt", createResponseFile(Args)); + Tar->append("version.txt", getLLDVersion() + "\n"); + } else { + error(Twine("--reproduce: failed to open ") + Path + ".tar: " + + toString(ErrOrWriter.takeError())); + } } readConfigs(Args); Index: lld/ELF/InputFiles.cpp =================================================================== --- lld/ELF/InputFiles.cpp +++ lld/ELF/InputFiles.cpp @@ -525,9 +525,9 @@ "could not get the buffer for the member defining symbol " + Sym->getName()); - if (C.getParent()->isThin() && Driver->Cpio) - Driver->Cpio->append(relativeToRoot(check(C.getFullName())), - Ret.getBuffer()); + if (C.getParent()->isThin() && Driver->Tar) + Driver->Tar->append(relativeToRoot(check(C.getFullName())), + Ret.getBuffer()); if (C.getParent()->isThin()) return {Ret, 0}; return {Ret, C.getChildOffset()}; Index: lld/include/lld/Core/Reproduce.h =================================================================== --- lld/include/lld/Core/Reproduce.h +++ lld/include/lld/Core/Reproduce.h @@ -11,46 +11,15 @@ #define LLD_CORE_REPRODUCE_H #include "lld/Core/LLVM.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Support/Error.h" namespace llvm { - -class raw_fd_ostream; - namespace opt { class Arg; } - } namespace lld { -// This class creates a .cpio file for --reproduce (ELF) or /linkrepro (COFF). -// -// If "--reproduce foo" is given, we create a file "foo.cpio" and -// copy all input files to the archive, along with a response file -// to re-run the same command with the same inputs. -// It is useful for reporting issues to LLD developers. -// -// Cpio as a file format is a deliberate choice. It's standardized in -// POSIX and very easy to create. cpio command is available virtually -// on all Unix systems. See -// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_07 -// for the format details. -class CpioFile { -public: - static ErrorOr create(StringRef OutputPath); - void append(StringRef Path, StringRef Data); - -private: - CpioFile(std::unique_ptr OS, StringRef Basename); - - std::unique_ptr OS; - llvm::StringSet<> Seen; - std::string Basename; -}; - // Makes a given pathname an absolute path first, and then remove // beginning /. For example, "../foo.o" is converted to "home/john/foo.o", // assuming that the current directory is "/home/john/bar". Index: lld/lib/Core/Reproduce.cpp =================================================================== --- lld/lib/Core/Reproduce.cpp +++ lld/lib/Core/Reproduce.cpp @@ -8,66 +8,14 @@ //===----------------------------------------------------------------------===// #include "lld/Core/Reproduce.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" #include "llvm/Support/Path.h" using namespace lld; using namespace llvm; -using namespace sys; - -CpioFile::CpioFile(std::unique_ptr OS, StringRef S) - : OS(std::move(OS)), Basename(S) {} - -ErrorOr CpioFile::create(StringRef OutputPath) { - std::string Path = (OutputPath + ".cpio").str(); - std::error_code EC; - auto OS = llvm::make_unique(Path, EC, sys::fs::F_None); - if (EC) - return EC; - return new CpioFile(std::move(OS), path::filename(OutputPath)); -} - -static void writeMember(raw_fd_ostream &OS, StringRef Path, StringRef Data) { - // The c_dev/c_ino pair should be unique according to the spec, - // but no one seems to care. - OS << "070707"; // c_magic - OS << "000000"; // c_dev - OS << "000000"; // c_ino - OS << "100664"; // c_mode: C_ISREG | rw-rw-r-- - OS << "000000"; // c_uid - OS << "000000"; // c_gid - OS << "000001"; // c_nlink - OS << "000000"; // c_rdev - OS << "00000000000"; // c_mtime - OS << format("%06o", Path.size() + 1); // c_namesize - OS << format("%011o", Data.size()); // c_filesize - OS << Path << '\0'; // c_name - OS << Data; // c_filedata -} - -void CpioFile::append(StringRef Path, StringRef Data) { - if (!Seen.insert(Path).second) - return; - - // Construct an in-archive filename so that /home/foo/bar is stored - // as baz/home/foo/bar where baz is the basename of the output file. - // (i.e. in that case we are creating baz.cpio.) - SmallString<128> Fullpath; - path::append(Fullpath, Basename, Path); - - writeMember(*OS, convertToUnixPathSeparator(Fullpath), Data); - - // Print the trailer and seek back. - // This way we have a valid archive if we crash. - uint64_t Pos = OS->tell(); - writeMember(*OS, "TRAILER!!!", ""); - OS->seek(Pos); -} +using namespace llvm::sys; // Makes a given pathname an absolute path first, and then remove // beginning /. For example, "../foo.o" is converted to "home/john/foo.o", @@ -76,7 +24,7 @@ // a mess with backslash-as-escape and backslash-as-path-separator. std::string lld::relativeToRoot(StringRef Path) { SmallString<128> Abs = Path; - if (sys::fs::make_absolute(Abs)) + if (fs::make_absolute(Abs)) return Path; path::remove_dots(Abs, /*remove_dot_dot=*/true); Index: lld/test/COFF/linkrepro.test =================================================================== --- lld/test/COFF/linkrepro.test +++ lld/test/COFF/linkrepro.test @@ -1,6 +1,3 @@ -# cpio fails on windows with "Function not implemented". -# REQUIRES: shell - # RUN: rm -rf %t.dir # RUN: mkdir -p %t.dir/build1 %t.dir/build2 %t.dir/build3 # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj @@ -8,7 +5,7 @@ # RUN: cd %t.dir/build1 # RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \ # RUN: /entry:main@0 /linkrepro:. /out:%t.exe -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: diff %t.obj repro/%:t.obj # RUN: diff %p/Inputs/std32.lib repro/%:p/Inputs/std32.lib # RUN: FileCheck %s --check-prefix=RSP < repro/response.txt @@ -16,7 +13,7 @@ # RUN: cd %t.dir/build2 # RUN: lld-link %t.obj /libpath:%p/Inputs /defaultlib:std32 /subsystem:console \ # RUN: /entry:main@0 /linkrepro:. /out:%t.exe -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: diff %t.obj repro/%:t.obj # RUN: diff %p/Inputs/std32.lib repro/%:p/Inputs/std32.lib # RUN: FileCheck %s --check-prefix=RSP < repro/response.txt @@ -24,7 +21,7 @@ # RUN: cd %t.dir/build3 # RUN: env LIB=%p/Inputs lld-link %t.obj /defaultlib:std32 /subsystem:console \ # RUN: /entry:main@0 /linkrepro:. /out:%t.exe -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: diff %t.obj repro/%:t.obj # RUN: diff %p/Inputs/std32.lib repro/%:p/Inputs/std32.lib # RUN: FileCheck %s --check-prefix=RSP < repro/response.txt Index: lld/test/ELF/reproduce-backslash.s =================================================================== --- lld/test/ELF/reproduce-backslash.s +++ lld/test/ELF/reproduce-backslash.s @@ -1,9 +1,9 @@ -# REQUIRES: x86, cpio, shell +# REQUIRES: x86, shell # Test that we don't erroneously replace \ with / on UNIX, as it's # legal for a filename to contain backslashes. # RUN: llvm-mc %s -o foo\\.o -filetype=obj -triple=x86_64-pc-linux # RUN: ld.lld foo\\.o --reproduce repro -# RUN: cpio -it < repro.cpio | FileCheck %s +# RUN: tar tf repro.tar | FileCheck %s -# CHECK: repro/{{.*}}/foo\.o +# CHECK: repro/{{.*}}/foo\\.o Index: lld/test/ELF/reproduce-error.s =================================================================== --- lld/test/ELF/reproduce-error.s +++ lld/test/ELF/reproduce-error.s @@ -1,4 +1,4 @@ -# Extracting the cpio archive can get over the path limit on windows. +# Extracting the tar archive can get over the path limit on windows. # REQUIRES: shell # RUN: rm -rf %t.dir @@ -8,8 +8,7 @@ # RUN: not ld.lld --reproduce repro abc -o t 2>&1 | FileCheck %s # CHECK: cannot open abc: {{N|n}}o such file or directory -# RUN: grep TRAILER repro.cpio -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: FileCheck --check-prefix=RSP %s < repro/response.txt # RSP: abc # RSP: -o t Index: lld/test/ELF/reproduce-linkerscript.s =================================================================== --- lld/test/ELF/reproduce-linkerscript.s +++ lld/test/ELF/reproduce-linkerscript.s @@ -6,7 +6,7 @@ # RUN: echo "INPUT(\"%t.dir/build/foo.o\")" > %t.dir/build/foo.script # RUN: cd %t.dir # RUN: ld.lld build/foo.script -o bar --reproduce repro -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: diff build/foo.script repro/%:t.dir/build/foo.script # RUN: diff build/foo.o repro/%:t.dir/build/foo.o Index: lld/test/ELF/reproduce-thin-archive.s =================================================================== --- lld/test/ELF/reproduce-thin-archive.s +++ lld/test/ELF/reproduce-thin-archive.s @@ -6,7 +6,7 @@ # RUN: cd %t.dir # RUN: llvm-ar --format=gnu rcT foo.a foo.o # RUN: ld.lld -m elf_x86_64 foo.a -o bar --reproduce repro -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: diff foo.a repro/%:t.dir/foo.a # RUN: diff foo.o repro/%:t.dir/foo.o Index: lld/test/ELF/reproduce-windows.s =================================================================== --- lld/test/ELF/reproduce-windows.s +++ lld/test/ELF/reproduce-windows.s @@ -1,4 +1,4 @@ -# REQUIRES: x86, cpio +# REQUIRES: x86 # Test that a repro archive always uses / instead of \. # RUN: rm -rf %t.dir @@ -6,7 +6,7 @@ # RUN: llvm-mc %s -o %t.dir/build/foo.o -filetype=obj -triple=x86_64-pc-linux # RUN: cd %t.dir # RUN: ld.lld build/foo.o --reproduce repro -# RUN: cpio -it < repro.cpio | FileCheck %s +# RUN: tar tf repro.tar | FileCheck %s # CHECK: repro/response.txt # CHECK: repro/{{.*}}/build/foo.o Index: lld/test/ELF/reproduce-windows2.s =================================================================== --- lld/test/ELF/reproduce-windows2.s +++ lld/test/ELF/reproduce-windows2.s @@ -1,4 +1,4 @@ -# REQUIRES: system-windows, x86, cpio +# REQUIRES: system-windows, x86 # Test that a response.txt file always uses / instead of \. # RUN: rm -rf %t.dir @@ -6,6 +6,5 @@ # RUN: llvm-mc %s -o %t.dir/build/foo.o -filetype=obj -triple=x86_64-pc-linux # RUN: cd %t.dir # RUN: ld.lld build/foo.o --reproduce repro -# RUN: echo "*response.txt" > list.txt -# RUN: cpio -i --to-stdout --pattern-file=list.txt < repro.cpio | FileCheck %s +# RUN: tar -O -x -f repro.tar repro/response.txt | FileCheck %s # CHECK: {{.*}}/build/foo.o Index: lld/test/ELF/reproduce.s =================================================================== --- lld/test/ELF/reproduce.s +++ lld/test/ELF/reproduce.s @@ -1,6 +1,6 @@ -# REQUIRES: x86, cpio +# REQUIRES: x86 -# Extracting the cpio archive can get over the path limit on windows. +# Extracting the tar archive can get over the path limit on windows. # REQUIRES: shell # RUN: rm -rf %t.dir @@ -8,7 +8,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.dir/build1/foo.o # RUN: cd %t.dir # RUN: ld.lld --hash-style=gnu build1/foo.o -o bar -shared --as-needed --reproduce repro -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: diff build1/foo.o repro/%:t.dir/build1/foo.o # RUN: FileCheck %s --check-prefix=RSP < repro/response.txt @@ -26,7 +26,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.dir/build2/foo.o # RUN: cd %t.dir/build2/a/b/c # RUN: env LLD_REPRODUCE=repro ld.lld ./../../../foo.o -o bar -shared --as-needed -# RUN: cpio -id < repro.cpio +# RUN: tar xf repro.tar # RUN: diff %t.dir/build2/foo.o repro/%:t.dir/build2/foo.o # RUN: echo "{ local: *; };" > ver @@ -37,7 +37,7 @@ # RUN: ld.lld --reproduce repro2 'foo bar' -L"foo bar" -Lfile -Tfile2 \ # RUN: --dynamic-list dyn -rpath file --script=file --version-script ver \ # RUN: --dynamic-linker "some unusual/path" -soname 'foo bar' -soname='foo bar' -# RUN: cpio -id < repro2.cpio +# RUN: tar xf repro2.tar # RUN: FileCheck %s --check-prefix=RSP2 < repro2/response.txt # RSP2: "{{.*}}foo bar" # RSP2-NEXT: -L "{{.*}}foo bar" @@ -51,7 +51,7 @@ # RSP2-NEXT: -soname="foo bar" # RSP2-NEXT: -soname="foo bar" -# RUN: cpio -it < repro2.cpio | FileCheck %s +# RUN: tar tf repro2.tar | FileCheck %s # CHECK: repro2/response.txt # CHECK-NEXT: repro2/version.txt # CHECK-NEXT: repro2/{{.*}}/dyn Index: lld/test/lit.cfg =================================================================== --- lld/test/lit.cfg +++ lld/test/lit.cfg @@ -258,7 +258,3 @@ rc = lit.util.which('rc', config.environment['PATH']) if cvtres and rc: config.available_features.add('winres') - -# Check if "cpio" command exists. -if lit.util.which('cpio', config.environment['PATH']): - config.available_features.add('cpio') Index: llvm/include/llvm/Support/TarWriter.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/TarWriter.h @@ -0,0 +1,32 @@ +//===-- llvm/Support/TarWriter.h - Tar archive file creator -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_TAR_WRITER_H +#define LLVM_SUPPORT_TAR_WRITER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +class TarWriter { +public: + static Expected> create(StringRef OutputPath, + StringRef BaseDir); + + void append(StringRef Path, StringRef Data); + +private: + TarWriter(int FD, StringRef BaseDir); + raw_fd_ostream OS; + std::string BaseDir; +}; +} + +#endif Index: llvm/lib/Support/CMakeLists.txt =================================================================== --- llvm/lib/Support/CMakeLists.txt +++ llvm/lib/Support/CMakeLists.txt @@ -91,6 +91,7 @@ StringSaver.cpp StringRef.cpp SystemUtils.cpp + TarWriter.cpp TargetParser.cpp ThreadPool.cpp Timer.cpp Index: llvm/lib/Support/TarWriter.cpp =================================================================== --- /dev/null +++ llvm/lib/Support/TarWriter.cpp @@ -0,0 +1,169 @@ +//===-- TarWriter.cpp - Tar archive file creator --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TarWriter class provides a feature to create a tar archive file. +// +// I put emphasis on simplicity over comprehensiveness when +// implementing this class because we don't need a full-fledged +// archive file generators in LLVM at the moment. +// +// The filename field in the Unix V7 tar header is 100 bytes, which is +// apparently too small. Various extensions were implemented to fix +// the issue. The writer implemented in this file emits PAX extension +// headers. +// +// Note that we emit PAX headers even if filenames fits in the V7 +// header for the sake of simplicity. So, generated files are N +// kilobyte larger than the ideal where N is the number of files in +// archives. In practice, you don't need to worry about that. +// +// The PAX header is standardized in IEEE Std 1003.1-2001. +// +// POSIX specified tar archives end with two null blocks, but looks +// like both GNU and BSD tar commands handle file EOFs as terminators, +// so we don't bother to emit terminators. +// +// The struct definition of UstarHeader is copied from +// https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TarWriter.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FileSystem.h" + +using namespace llvm; + +// Each file in an archive must be aligned to this block size. +static const int BlockSize = 512; + +struct UstarHeader { + char Name[100]; + char Mode[8]; + char Uid[8]; + char Gid[8]; + char Size[12]; + char Mtime[12]; + char Checksum[8]; + char TypeFlag; + char Linkname[100]; + char Magic[6]; + char Version[2]; + char Uname[32]; + char Gname[32]; + char DevMajor[8]; + char DevMinor[8]; + char Prefix[155]; + char Pad[12]; +}; +static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); + +// A PAX attribute is in the form of " =\n" +// where is the length of the entire string including +// the length field itself. An example string is this. +// +// 25 ctime=1084839148.1212\n +// +// This function create such string. +static std::string formatPax(StringRef Key, const Twine &Val) { + int Len = Key.size() + Val.str().size() + 3; // +3 for " ", "=" and "\n" + + // We need to compute total size twice because appending + // a length field could change total size by one. + int Total = Len + Twine(Len).str().size(); + Total = Len + Twine(Total).str().size(); + return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); +} + +// Headers in tar files must be aligned to 512 byte boundaries. +// This function writes null bytes so that the file is a multiple +// of 512 bytes. +static void pad(raw_ostream &OS) { + uint64_t Pos = OS.tell(); + if (Pos % BlockSize == 0) + return; + OS << std::string(BlockSize - Pos % BlockSize, '\0'); +} + +// Computes a checksum for a tar header. +static void computeChecksum(UstarHeader &Hdr) { + // Before computing a checksum, checksum field must be + // filled with space characters. + for (size_t I = 0; I < sizeof(Hdr.Checksum); ++I) + Hdr.Checksum[I] = ' '; + + // Compute a checksum and set it to the checksum field. + unsigned Chksum = 0; + for (size_t I = 0; I < sizeof(Hdr); ++I) + Chksum += reinterpret_cast(&Hdr)[I]; + sprintf(Hdr.Checksum, "%06o", Chksum); +} + +// Create a tar header and write it to a given output stream. +static void writePaxHeader(raw_ostream &OS, const Twine &Path) { + // A PAX header consists of a 512-byte header followed + // by key-value strings. First, create key-value strings. + std::string PaxAttr = formatPax("path", Path); + + // Create a 512-byte header. + UstarHeader Hdr = {}; + sprintf(Hdr.Size, "%011lo", PaxAttr.size()); + Hdr.TypeFlag = 'x'; // PAX magic + memcpy(Hdr.Magic, "ustar", 6); // Ustar magic + computeChecksum(Hdr); + + // Write them down. + OS << StringRef(reinterpret_cast(&Hdr), sizeof(Hdr)); + OS << PaxAttr; + pad(OS); +} + +// The PAX header is an extended format, so a PAX header needs +// to be followed by a "real" header. +static void writeUstarHeader(raw_ostream &OS, size_t Size) { + UstarHeader Hdr = {}; + strcpy(Hdr.Mode, "0000664"); + sprintf(Hdr.Size, "%011lo", Size); + memcpy(Hdr.Magic, "ustar", 6); + + computeChecksum(Hdr); + OS << StringRef(reinterpret_cast(&Hdr), sizeof(Hdr)); +} + +// We want to use '/' as a path separator even on Windows. +// This function canonicalizes a given path. +static std::string canonicalize(StringRef S) { +#ifdef LLVM_ON_WIN32 + std::string Ret = S.str(); + std::replace(Ret.begin(), Ret.end(), '\\', '/'); + return Ret; +#else + return S; +#endif +} + +// Creates a TarWriter instance and returns it. +Expected> TarWriter::create(StringRef OutputPath, + StringRef BaseDir) { + int FD; + if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None)) + return make_error("cannot open " + OutputPath, EC); + return std::unique_ptr(new TarWriter(FD, BaseDir)); +} + +TarWriter::TarWriter(int FD, StringRef BaseDir) + : OS(FD, /*shouldClose=*/true, /*unbuffered=*/true), BaseDir(BaseDir) {} + +// Append a given file to an archive. +void TarWriter::append(StringRef Path, StringRef Data) { + writePaxHeader(OS, BaseDir + "/" + canonicalize(Path)); + writeUstarHeader(OS, Data.size()); + OS << Data; + pad(OS); +}