Index: cfe/trunk/lib/CodeGen/CodeGenPGO.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenPGO.cpp +++ cfe/trunk/lib/CodeGen/CodeGenPGO.cpp @@ -612,7 +612,7 @@ llvm::MD5::MD5Result Result; MD5.final(Result); using namespace llvm::support; - return endian::read(Result); + return Result.low(); } void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { Index: cfe/trunk/lib/Frontend/ASTUnit.cpp =================================================================== --- cfe/trunk/lib/Frontend/ASTUnit.cpp +++ cfe/trunk/lib/Frontend/ASTUnit.cpp @@ -1252,7 +1252,7 @@ PreambleFileHash Result; Result.Size = Size; Result.ModTime = ModTime; - memset(Result.MD5, 0, sizeof(Result.MD5)); + Result.MD5 = {}; return Result; } @@ -1273,7 +1273,7 @@ bool operator==(const ASTUnit::PreambleFileHash &LHS, const ASTUnit::PreambleFileHash &RHS) { return LHS.Size == RHS.Size && LHS.ModTime == RHS.ModTime && - memcmp(LHS.MD5, RHS.MD5, sizeof(LHS.MD5)) == 0; + LHS.MD5 == RHS.MD5; } } // namespace clang Index: lld/trunk/COFF/Writer.cpp =================================================================== --- lld/trunk/COFF/Writer.cpp +++ lld/trunk/COFF/Writer.cpp @@ -791,7 +791,7 @@ "only PDB 7.0 is supported"); assert(sizeof(Res) == sizeof(BuildId->DI->PDB70.Signature) && "signature size mismatch"); - memcpy(BuildId->DI->PDB70.Signature, Res, + memcpy(BuildId->DI->PDB70.Signature, Res.Bytes.data(), sizeof(codeview::PDB70DebugInfo::Signature)); // TODO(compnerd) track the Age BuildId->DI->PDB70.Age = 1; Index: lldb/trunk/source/Host/common/FileSystem.cpp =================================================================== --- lldb/trunk/source/Host/common/FileSystem.cpp +++ lldb/trunk/source/Host/common/FileSystem.cpp @@ -67,9 +67,7 @@ if (!CalcMD5(file_spec, offset, length, md5_result)) return false; - const auto uint64_res = reinterpret_cast(md5_result); - high = uint64_res[0]; - low = uint64_res[1]; + std::tie(high, low) = md5_result.words(); return true; } Index: lldb/trunk/source/Utility/DataExtractor.cpp =================================================================== --- lldb/trunk/source/Utility/DataExtractor.cpp +++ lldb/trunk/source/Utility/DataExtractor.cpp @@ -1233,6 +1233,6 @@ llvm::MD5::MD5Result result; md5.final(result); - dest.resize(16); - std::copy(result, result + 16, dest.begin()); + dest.clear(); + dest.append(result.Bytes.begin(), result.Bytes.end()); } Index: llvm/trunk/include/llvm/Support/FileSystem.h =================================================================== --- llvm/trunk/include/llvm/Support/FileSystem.h +++ llvm/trunk/include/llvm/Support/FileSystem.h @@ -33,6 +33,7 @@ #include "llvm/Support/Chrono.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MD5.h" #include #include #include @@ -399,6 +400,16 @@ /// platform-specific error_code. std::error_code resize_file(int FD, uint64_t Size); +/// @brief Compute an MD5 hash of a file's contents. +/// +/// @param FD Input file descriptor. +/// @returns An MD5Result with the hash computed, if successful, otherwise a +/// std::error_code. +ErrorOr md5_contents(int FD); + +/// @brief Version of compute_md5 that doesn't require an open file descriptor. +ErrorOr md5_contents(const Twine &Path); + /// @} /// @name Physical Observers /// @{ Index: llvm/trunk/include/llvm/Support/MD5.h =================================================================== --- llvm/trunk/include/llvm/Support/MD5.h +++ llvm/trunk/include/llvm/Support/MD5.h @@ -52,7 +52,32 @@ MD5_u32plus block[16]; public: - typedef uint8_t MD5Result[16]; + struct MD5Result { + std::array Bytes; + + operator std::array() const { return Bytes; } + + const uint8_t &operator[](size_t I) const { return Bytes[I]; } + uint8_t &operator[](size_t I) { return Bytes[I]; } + + SmallString<32> digest() const; + + uint64_t low() const { + // Our MD5 implementation returns the result in little endian, so the low + // word is first. + using namespace support; + return endian::read(Bytes.data()); + } + + uint64_t high() const { + using namespace support; + return endian::read(Bytes.data() + 8); + } + std::pair words() const { + using namespace support; + return std::make_pair(high(), low()); + } + }; MD5(); @@ -76,6 +101,10 @@ const uint8_t *body(ArrayRef Data); }; +inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) { + return LHS.Bytes == RHS.Bytes; +} + /// Helper to compute and return lower 64 bits of the given string's MD5 hash. inline uint64_t MD5Hash(StringRef Str) { using namespace support; @@ -84,9 +113,8 @@ Hash.update(Str); MD5::MD5Result Result; Hash.final(Result); - // Return the least significant 8 bytes. Our MD5 implementation returns the - // result in little endian, so we may need to swap bytes. - return endian::read(Result); + // Return the least significant word. + return Result.low(); } } // end namespace llvm Index: llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -490,9 +490,9 @@ Hash.final(Result); // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. - return support::endian::read64le(Result + 8); + // implementation always returns its results in little endian, so we actually + // need the "high" word. + return Result.high(); } /// This is based on the type signature computation given in section 7.27 of the @@ -514,7 +514,7 @@ Hash.final(Result); // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. - return support::endian::read64le(Result + 8); + // implementation always returns its results in little endian, so we actually + // need the "high" word. + return Result.high(); } Index: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" @@ -1945,11 +1944,11 @@ MD5 Hash; Hash.update(Identifier); // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. + // implementation always returns its results in little endian, so we actually + // need the "high" word. MD5::MD5Result Result; Hash.final(Result); - return support::endian::read64le(Result + 8); + return Result.high(); } void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, Index: llvm/trunk/lib/Support/MD5.cpp =================================================================== --- llvm/trunk/lib/Support/MD5.cpp +++ llvm/trunk/lib/Support/MD5.cpp @@ -261,10 +261,16 @@ support::endian::write32le(&Result[12], d); } -void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) { +SmallString<32> MD5::MD5Result::digest() const { + SmallString<32> Str; raw_svector_ostream Res(Str); for (int i = 0; i < 16; ++i) - Res << format("%.2x", Result[i]); + Res << format("%.2x", Bytes[i]); + return Str; +} + +void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) { + Str = Result.digest(); } std::array MD5::hash(ArrayRef Data) { @@ -273,7 +279,5 @@ MD5::MD5Result Res; Hash.final(Res); - std::array Arr; - memcpy(Arr.data(), Res, sizeof(Res)); - return Arr; + return Res; } Index: llvm/trunk/lib/Support/Path.cpp =================================================================== --- llvm/trunk/lib/Support/Path.cpp +++ llvm/trunk/lib/Support/Path.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Path.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/COFF.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/Process.h" #include #include @@ -924,6 +925,36 @@ return std::error_code(); } +ErrorOr md5_contents(int FD) { + MD5 Hash; + + constexpr size_t BufSize = 4096; + std::vector Buf(BufSize); + int BytesRead = 0; + for (;;) { + BytesRead = read(FD, Buf.data(), BufSize); + if (BytesRead <= 0) + break; + Hash.update(makeArrayRef(Buf.data(), BytesRead)); + } + + if (BytesRead < 0) + return std::error_code(errno, std::generic_category()); + MD5::MD5Result Result; + Hash.final(Result); + return Result; +} + +ErrorOr md5_contents(const Twine &Path) { + int FD; + if (auto EC = openFileForRead(Path, FD)) + return EC; + + auto Result = md5_contents(FD); + close(FD); + return Result; +} + bool exists(file_status status) { return status_known(status) && status.type() != file_type::file_not_found; } Index: llvm/trunk/unittests/Support/MD5Test.cpp =================================================================== --- llvm/trunk/unittests/Support/MD5Test.cpp +++ llvm/trunk/unittests/Support/MD5Test.cpp @@ -63,8 +63,10 @@ std::array Vec = MD5::hash(Input); MD5::MD5Result MD5Res; SmallString<32> Res; - memcpy(MD5Res, Vec.data(), Vec.size()); + memcpy(MD5Res.Bytes.data(), Vec.data(), Vec.size()); MD5::stringifyResult(MD5Res, Res); EXPECT_EQ(Res, "c3fcd3d76192e4007dfb496cca67e13b"); + EXPECT_EQ(0x3be167ca6c49fb7dULL, MD5Res.high()); + EXPECT_EQ(0x00e49261d7d3fcc3ULL, MD5Res.low()); } } Index: llvm/trunk/unittests/Support/Path.cpp =================================================================== --- llvm/trunk/unittests/Support/Path.cpp +++ llvm/trunk/unittests/Support/Path.cpp @@ -1011,6 +1011,20 @@ ASSERT_NO_ERROR(fs::remove(TempPath)); } +TEST_F(FileSystemTest, MD5) { + int FD; + SmallString<64> TempPath; + ASSERT_NO_ERROR(fs::createTemporaryFile("prefix", "temp", FD, TempPath)); + StringRef Data("abcdefghijklmnopqrstuvwxyz"); + write(FD, Data.data(), Data.size()); + lseek(FD, 0, SEEK_SET); + auto Hash = fs::md5_contents(FD); + ::close(FD); + ASSERT_NO_ERROR(Hash.getError()); + + EXPECT_STREQ("c3fcd3d76192e4007dfb496cca67e13b", Hash->digest().c_str()); +} + TEST_F(FileSystemTest, FileMapping) { // Create a temp file. int FileDescriptor;