diff --git a/mlir/include/mlir/Support/DataFile.h b/mlir/include/mlir/Support/DataFile.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Support/DataFile.h @@ -0,0 +1,62 @@ +//===- DataFile.h - I/O support for MLIR Data Files -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A number of built-in MLIR attributes can represent arbitrarily large data +// which is not suitable for direct inclusion in a module. There are various +// mechanisms for externalizing this data, and hooks exist for projects to +// bring their own serialization formats. This file, and the corresponding +// mlir-data-file tool provide a basic implementation of a binary file format +// that should be appropriate for many such use cases. +// +// Logically, an MLIR Data File presents in either read or write mode. When +// reading, it maps an arbitrary symbol name to some binary contents. When +// writing, the Data File can have duplicate symbols appended, with the last +// one winning. +// +// Physically, an MLIR Data File consists of: +// +// ``` +// (File_Header_Record) +// ( +// (Content_Record)* +// (TOC_Record)* +// (Section_Trailer_Record) +// )* +// ``` +// +// All records are minimally aligned to 16 byte offsets within the physical +// file, and Content_Records can be individually aligned to larger values. +// +// Where the file format uses integers, they are encoded as little endian. +// +// Since Content_Records are opaque and referred to by physical offset in the +// TOC, the Data File can be used in contexts disconnected from MLIR (i.e. +// as an auxillary file to some other assembly that just references contents +// by offset), and such usage can be optimized by first compacting and stripping +// TOC records. +// +// Data files contain a file format version which must be supported by a +// corresponding API that wishes to manipulate file metadata. This API only +// supports writing to a data file at the same version as itself. Reading +// historic versions should be supported. +//===----------------------------------------------------------------------===// + +#ifndef MLIR_SUPPORT_DATA_FILE_H_ +#define MLIR_SUPPORT_DATA_FILE_H_ + +#include "llvm/Support/Error.h" + +namespace mlir { + +class DataFileBuilder { +public: +}; + +} // namespace mlir + +#endif // MLIR_SUPPORT_DATA_FILE_H_ diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -1,4 +1,5 @@ set(LLVM_OPTIONAL_SOURCES + DataFile.cpp DebugCounter.cpp FileUtilities.cpp IndentedOstream.cpp @@ -10,6 +11,7 @@ ) add_mlir_library(MLIRSupport + DataFile.cpp DebugCounter.cpp FileUtilities.cpp IndentedOstream.cpp diff --git a/mlir/lib/Support/DataFile.cpp b/mlir/lib/Support/DataFile.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Support/DataFile.cpp @@ -0,0 +1,11 @@ +//===- DataFile.cpp - MLIR Data File Library ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/DataFile.h" + +using namespace mlir; diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt --- a/mlir/tools/CMakeLists.txt +++ b/mlir/tools/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(mlir-data-file) add_subdirectory(mlir-lsp-server) add_subdirectory(mlir-opt) add_subdirectory(mlir-parser-fuzzer) diff --git a/mlir/tools/mlir-data-file/CMakeLists.txt b/mlir/tools/mlir-data-file/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/tools/mlir-data-file/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_llvm_tool(mlir-data-file mlir-data-file.cpp) +llvm_update_compile_flags(mlir-data-file) +target_link_libraries(mlir-data-file PRIVATE + MLIRSupport + MLIRParser +) diff --git a/mlir/tools/mlir-data-file/mlir-data-file.cpp b/mlir/tools/mlir-data-file/mlir-data-file.cpp new file mode 100644 --- /dev/null +++ b/mlir/tools/mlir-data-file/mlir-data-file.cpp @@ -0,0 +1,212 @@ +//===- mlir-linalg-ods-yaml-gen.cpp - Linalg ODS generation from yaml ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operates on MLIR DataFiles. This tool doubles as both a testing tool of +// the format and utilities and as a maintenance tool (i.e. compacting, +// stripping, etc). +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/DataFile.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" + +using namespace llvm; +using namespace mlir; + +//===----------------------------------------------------------------------===// +// DataFileBinaryStream +// The stock implementation of DataFileBinaryStream assumes a fixed size, +// write once modality. When building a Data File, we are operating on +// a backing file of unknown length and we want to operate on the file +// directly. +// TODO: Split this into a read-only base class and a writable subclass. +//===----------------------------------------------------------------------===// + +class DataFileBinaryStream : public WritableBinaryStream { +public: + ~DataFileBinaryStream() override; + + static Expected> + openForReadWrite(StringRef filePath); + + llvm::support::endianness getEndian() const override { + return llvm::support::endianness::little; + } + + Error readBytes(uint64_t Offset, uint64_t Size, + ArrayRef &Buffer) override; + + Error readLongestContiguousChunk(uint64_t Offset, + ArrayRef &Buffer) override; + + uint64_t getLength() override; + + BinaryStreamFlags getFlags() const override { + if (writable) { + return BSF_Write | BSF_Append; + } else { + return BSF_None; + } + } + + Error writeBytes(uint64_t Offset, ArrayRef Data) override; + + Error commit() override; + +private: + DataFileBinaryStream(llvm::sys::fs::file_t file, uint64_t fileSize, + bool writable) + : file(file), os(file, /*shouldClose=*/false, /*unbuffered=*/true), + fileSize(fileSize), writable(writable) { + // Since we don't use the read buffer for bulk data-access, we can be + // fairly small here. + readBuffer.resize(4096); + } + llvm::sys::fs::file_t file; + raw_fd_ostream os; + // We fulfill read requests from this buffer. + std::vector readBuffer; + uint64_t fileSize; + bool writable; +}; + +Expected> +DataFileBinaryStream::openForReadWrite(StringRef filePath) { + // Open file. + auto file = llvm::sys::fs::openNativeFileForReadWrite( + filePath, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::OF_None); + if (auto E = file.takeError()) { + return std::move(E); + } + + // Get its current size. + llvm::sys::fs::file_status fileStatus; + if (auto ec = llvm::sys::fs::status(*file, fileStatus)) { + llvm::sys::fs::closeFile(*file); + return errorCodeToError(ec); + } + + return std::unique_ptr( + new DataFileBinaryStream(std::move(*file), fileStatus.getSize(), true)); +} + +DataFileBinaryStream::~DataFileBinaryStream() { + llvm::sys::fs::closeFile(file); +} + +Error DataFileBinaryStream::commit() { return Error::success(); } + +uint64_t DataFileBinaryStream::getLength() { return fileSize; } + +Error DataFileBinaryStream::readBytes(uint64_t Offset, uint64_t Size, + ArrayRef &Buffer) { + auto actualSize = llvm::sys::fs::readNativeFileSlice( + file, + MutableArrayRef(reinterpret_cast(readBuffer.data()), + readBuffer.size()), + Offset); + if (auto E = actualSize.takeError()) { + return E; + } + Buffer = ArrayRef(readBuffer.data(), *actualSize); + return Error::success(); +} + +Error DataFileBinaryStream::readLongestContiguousChunk( + uint64_t Offset, ArrayRef &Buffer) { + return readBytes(Offset, readBuffer.size(), Buffer); +} + +Error DataFileBinaryStream::writeBytes(uint64_t Offset, + ArrayRef Data) { + os.pwrite(reinterpret_cast(Data.data()), Data.size(), Offset); + auto ec = os.error(); + if (ec) { + os.clear_error(); + return errorCodeToError(ec); + } + + fileSize = std::max(fileSize, Offset + Data.size()); + return Error::success(); +} + +//===----------------------------------------------------------------------===// +// Create SubCommand +//===----------------------------------------------------------------------===// + +namespace { +struct CreateSubCommand { + CreateSubCommand() + : subCommand("create", "Creates an MLIR Data File from the command line"), + outputFile(cl::Positional, cl::sub(subCommand), + cl::desc(""), cl::Required), + specs(cl::Positional, cl::sub(subCommand), + cl::desc("( )*"), cl::ZeroOrMore) {} + cl::SubCommand subCommand; + cl::opt outputFile; + cl::list specs; + + int run(); +}; +} // namespace + +int CreateSubCommand::run() { + // Open file. + auto expectedStream = DataFileBinaryStream::openForReadWrite(outputFile); + if (auto E = expectedStream.takeError()) { + errs() << "Could not create output file (" << outputFile + << "): " << toString(std::move(E)) << "\n"; + return 1; + } + + // Access stream. + std::unique_ptr stream = std::move(*expectedStream); + BinaryStreamWriter writer(*stream); + if (auto E = writer.writeCString("Foobar!\n")) { + errs() << "Error writing: " << toString(std::move(E)) << "\n"; + return 1; + } + + if (auto E = writer.padToAlignment(64)) { + errs() << "Error padding: " << toString(std::move(E)) << "\n"; + return 1; + } + + if (auto E = writer.writeCString("Still here!\n")) { + errs() << "Error writing: " << toString(std::move(E)) << "\n"; + return 1; + } + + // Commit. + if (auto E = stream->commit()) { + errs() << "Error committing output file: " << toString(std::move(E)) + << "\n"; + return 1; + } + + return 0; +} + +int main(int argc, char **argv) { + static CreateSubCommand createSc; + + cl::ParseCommandLineOptions(argc, argv, "MLIR Data File Tool"); + + if (createSc.subCommand) { + return createSc.run(); + } else { + llvm::errs() << "Unknown subcommand\n"; + return 1; + } + + return 0; +}