diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt --- a/llvm/benchmarks/CMakeLists.txt +++ b/llvm/benchmarks/CMakeLists.txt @@ -2,3 +2,5 @@ Support) add_benchmark(DummyYAML DummyYAML.cpp) + +add_subdirectory(hash-benchmark) diff --git a/llvm/benchmarks/hash-benchmark/CMakeLists.txt b/llvm/benchmarks/hash-benchmark/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/benchmarks/hash-benchmark/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_LINK_COMPONENTS + Support) + +add_benchmark(hash-benchmark hash-benchmark.cpp) diff --git a/llvm/benchmarks/hash-benchmark/hash-benchmark.cpp b/llvm/benchmarks/hash-benchmark/hash-benchmark.cpp new file mode 100644 --- /dev/null +++ b/llvm/benchmarks/hash-benchmark/hash-benchmark.cpp @@ -0,0 +1,55 @@ +#include "benchmark/benchmark.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/xxhash.h" +using namespace llvm; + +static const std::string kData = R"( +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec eget ex varius, +volutpat augue sit amet, pharetra nisl. Fusce elementum, orci a aliquet posuere, +risus lectus tincidunt dui, vel mollis ante nibh vitae velit. Quisque ultrices +ligula augue, eget hendrerit leo aliquam nec. Vestibulum tempus finibus +ullamcorper. Maecenas suscipit massa risus, eget tristique nunc gravida non. +Nunc convallis, erat id ultricies finibus, est ipsum aliquet risus, at vulputate +sapien mi et metus. Nunc consectetur massa eu interdum hendrerit. + +Vestibulum id volutpat justo. Nullam nec justo lectus. Vestibulum cursus cursus +ipsum, id hendrerit leo dapibus ut. Curabitur venenatis ex justo, ut feugiat +neque consectetur non. Sed vel lobortis augue. Cras pretium a nisl non +fringilla. Praesent vitae eros nisi. In ultricies augue turpis, a finibus leo +faucibus quis. Pellentesque euismod leo ut venenatis pharetra. Suspendisse +potenti. + +Quisque scelerisque eu dolor eu auctor. Mauris gravida nunc ac porttitor +euismod. Integer massa nunc.)"; + +static std::string getData(size_t Size) { + std::string Data; + Data.reserve(Size + kData.size()); + while (Data.size() < Size) + Data.append(kData); + Data.resize(Size); + return Data; +} + +static void BM_SHA1(benchmark::State &State) { + auto Data = getData(State.range(0)); + for (auto _ : State) { + benchmark::ClobberMemory(); + const auto Hash = SHA1::hash({(uint8_t const *)Data.data(), Data.size()}); + benchmark::DoNotOptimize(Hash); + } +} + +static void BM_xxHash64(benchmark::State &State) { + auto Data = getData(State.range(0)); + for (auto _ : State) { + benchmark::ClobberMemory(); + const auto Hash = xxHash64(Data); + benchmark::DoNotOptimize(Hash); + } +} + +BENCHMARK(BM_SHA1)->Range(1 << 10, 1 << 20); +BENCHMARK(BM_xxHash64)->Range(1 << 10, 1 << 20); + +BENCHMARK_MAIN(); diff --git a/llvm/lib/Support/SHA1.cpp b/llvm/lib/Support/SHA1.cpp --- a/llvm/lib/Support/SHA1.cpp +++ b/llvm/lib/Support/SHA1.cpp @@ -16,8 +16,10 @@ #include "llvm/Support/SHA1.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Host.h" using namespace llvm; +using namespace support; #include #include @@ -26,45 +28,45 @@ #define SHA_BIG_ENDIAN #endif -static uint32_t rol(uint32_t Number, int Bits) { +static inline uint32_t rol(uint32_t Number, int Bits) { return (Number << Bits) | (Number >> (32 - Bits)); } -static uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; } +static inline uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; } -static uint32_t blk(uint32_t *Buf, int I) { +static inline uint32_t blk(uint32_t *Buf, int I) { Buf[I & 15] = rol(Buf[(I + 13) & 15] ^ Buf[(I + 8) & 15] ^ Buf[(I + 2) & 15] ^ Buf[I & 15], 1); return Buf[I & 15]; } -static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, - int I, uint32_t *Buf) { +static inline void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, + uint32_t &E, int I, uint32_t *Buf) { E += ((B & (C ^ D)) ^ D) + blk0(Buf, I) + 0x5A827999 + rol(A, 5); B = rol(B, 30); } -static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, - int I, uint32_t *Buf) { +static inline void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, + uint32_t &E, int I, uint32_t *Buf) { E += ((B & (C ^ D)) ^ D) + blk(Buf, I) + 0x5A827999 + rol(A, 5); B = rol(B, 30); } -static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, - int I, uint32_t *Buf) { +static inline void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, + uint32_t &E, int I, uint32_t *Buf) { E += (B ^ C ^ D) + blk(Buf, I) + 0x6ED9EBA1 + rol(A, 5); B = rol(B, 30); } -static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, - int I, uint32_t *Buf) { +static inline void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, + uint32_t &E, int I, uint32_t *Buf) { E += (((B | C) & D) | (B & C)) + blk(Buf, I) + 0x8F1BBCDC + rol(A, 5); B = rol(B, 30); } -static void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, - int I, uint32_t *Buf) { +static inline void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, + uint32_t &E, int I, uint32_t *Buf) { E += (B ^ C ^ D) + blk(Buf, I) + 0xCA62C1D6 + rol(A, 5); B = rol(B, 30); } @@ -210,8 +212,32 @@ } void SHA1::update(ArrayRef Data) { - for (auto &C : Data) - writebyte(C); + InternalState.ByteCount += Data.size(); + + // Finish the current block + if (InternalState.BufferOffset > 0) { + const size_t Remainder = std::min( + Data.size(), BLOCK_LENGTH - InternalState.BufferOffset); + for (size_t I = 0; I < Remainder; ++I) + addUncounted(Data[I]); + Data = Data.drop_front(Remainder); + } + + // Fast buffer filling for large inputs + while (Data.size() >= BLOCK_LENGTH) { + assert(InternalState.BufferOffset == 0); + assert(BLOCK_LENGTH % 4 == 0); + constexpr size_t BLOCK_LENGTH_32 = BLOCK_LENGTH / 4; + for (size_t I = 0; I < BLOCK_LENGTH_32; ++I) { + InternalState.Buffer.L[I] = endian::read32be(&Data[I * 4]); + } + hashBlock(); + Data = Data.drop_front(BLOCK_LENGTH); + } + + // Finish the remainder + for (auto const C : Data) + addUncounted(C); } void SHA1::pad() { diff --git a/llvm/unittests/Support/raw_sha1_ostream_test.cpp b/llvm/unittests/Support/raw_sha1_ostream_test.cpp --- a/llvm/unittests/Support/raw_sha1_ostream_test.cpp +++ b/llvm/unittests/Support/raw_sha1_ostream_test.cpp @@ -43,6 +43,22 @@ ASSERT_EQ("2EF7BDE608CE5404E97D5F042F95F89F1C232871", Hash); } +TEST(sha1_hash_test, Update) { + SHA1 sha1; + std::string Input = "123456789012345678901234567890"; + ASSERT_EQ(Input.size(), 30UL); + // 3 short updates. + sha1.update(Input); + sha1.update(Input); + sha1.update(Input); + // Long update that gets into the optimized loop with prefix/suffix. + sha1.update(Input + Input + Input + Input); + // 18 bytes buffered now. + + std::string Hash = toHex(sha1.final()); + ASSERT_EQ("3E4A614101AD84985AB0FE54DC12A6D71551E5AE", Hash); +} + // Check that getting the intermediate hash in the middle of the stream does // not invalidate the final result. TEST(raw_sha1_ostreamTest, Intermediate) {