diff --git a/libcxx/utils/google-benchmark/README.LLVM b/libcxx/utils/google-benchmark/README.LLVM --- a/libcxx/utils/google-benchmark/README.LLVM +++ b/libcxx/utils/google-benchmark/README.LLVM @@ -14,3 +14,11 @@ is applied on top of https://github.com/google/benchmark/commit/4528c76b718acc9b57956f63069c699ae21edcab to fix cross-build from linux to windows via MinGW. +* https://github.com/google/benchmark/commit/a77d5f70efaebe2b7e8c10134526a23a7ce7ef35 + and + https://github.com/google/benchmark/commit/ecc1685340f58f7fe6b707036bc0bb1fccabb0c1 + are applied on top of + https://github.com/google/benchmark/commit/8e48105d465c586068dd8e248fe75a8971c6ba3a + to fix timestamp-related inline asm issues and 32-bit RISC-V build failures. + The second cherrypicked commit fixes formatting issues introduced by the + preceding change. diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h --- a/libcxx/utils/google-benchmark/src/cycleclock.h +++ b/libcxx/utils/google-benchmark/src/cycleclock.h @@ -84,13 +84,21 @@ return (high << 32) | low; #elif defined(__powerpc__) || defined(__ppc__) // This returns a time-base, which is not always precisely a cycle-count. - int64_t tbl, tbu0, tbu1; - asm("mftbu %0" : "=r"(tbu0)); - asm("mftb %0" : "=r"(tbl)); - asm("mftbu %0" : "=r"(tbu1)); - tbl &= -static_cast(tbu0 == tbu1); - // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) - return (tbu1 << 32) | tbl; +#if defined(__powerpc64__) || defined(__ppc64__) + int64_t tb; + asm volatile("mfspr %0, 268" : "=r"(tb)); + return tb; +#else + uint32_t tbl, tbu0, tbu1; + asm volatile( + "mftbu %0\n" + "mftbl %1\n" + "mftbu %2" + : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) + return (static_cast(tbu1) << 32) | tbl; +#endif #elif defined(__sparc__) int64_t tick; asm(".byte 0x83, 0x41, 0x00, 0x00"); @@ -167,16 +175,22 @@ #elif defined(__riscv) // RISC-V // Use RDCYCLE (and RDCYCLEH on riscv32) #if __riscv_xlen == 32 - uint64_t cycles_low, cycles_hi0, cycles_hi1; - asm("rdcycleh %0" : "=r"(cycles_hi0)); - asm("rdcycle %0" : "=r"(cycles_lo)); - asm("rdcycleh %0" : "=r"(cycles_hi1)); - // This matches the PowerPC overflow detection, above - cycles_lo &= -static_cast(cycles_hi0 == cycles_hi1); - return (cycles_hi1 << 32) | cycles_lo; + uint32_t cycles_lo, cycles_hi0, cycles_hi1; + // This asm also includes the PowerPC overflow handling strategy, as above. + // Implemented in assembly because Clang insisted on branching. + asm volatile( + "rdcycleh %0\n" + "rdcycle %1\n" + "rdcycleh %2\n" + "sub %0, %0, %2\n" + "seqz %0, %0\n" + "sub %0, zero, %0\n" + "and %1, %1, %0\n" + : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); + return (static_cast(cycles_hi1) << 32) | cycles_lo; #else uint64_t cycles; - asm("rdcycle %0" : "=r"(cycles)); + asm volatile("rdcycle %0" : "=r"(cycles)); return cycles; #endif #else diff --git a/llvm/utils/benchmark/README.LLVM b/llvm/utils/benchmark/README.LLVM --- a/llvm/utils/benchmark/README.LLVM +++ b/llvm/utils/benchmark/README.LLVM @@ -27,3 +27,9 @@ is applied on top of v1.4.1 to add RISC-V timer support. * https://github.com/google/benchmark/commit/8e48105d465c586068dd8e248fe75a8971c6ba3a is applied on top of v1.4.1 to fix cross-build from linux to windows via MinGW. +* https://github.com/google/benchmark/commit/a77d5f70efaebe2b7e8c10134526a23a7ce7ef35 + and + https://github.com/google/benchmark/commit/ecc1685340f58f7fe6b707036bc0bb1fccabb0c1 + are applied on top of the previous cherrypick to fix timestamp-related inline + asm issues and 32-bit RISC-V build failures. The second cherrypicked commit + fixes formatting issues introduced by the preceding change. diff --git a/llvm/utils/benchmark/src/cycleclock.h b/llvm/utils/benchmark/src/cycleclock.h --- a/llvm/utils/benchmark/src/cycleclock.h +++ b/llvm/utils/benchmark/src/cycleclock.h @@ -84,13 +84,21 @@ return (high << 32) | low; #elif defined(__powerpc__) || defined(__ppc__) // This returns a time-base, which is not always precisely a cycle-count. - int64_t tbl, tbu0, tbu1; - asm("mftbu %0" : "=r"(tbu0)); - asm("mftb %0" : "=r"(tbl)); - asm("mftbu %0" : "=r"(tbu1)); - tbl &= -static_cast(tbu0 == tbu1); - // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) - return (tbu1 << 32) | tbl; +#if defined(__powerpc64__) || defined(__ppc64__) + int64_t tb; + asm volatile("mfspr %0, 268" : "=r"(tb)); + return tb; +#else + uint32_t tbl, tbu0, tbu1; + asm volatile( + "mftbu %0\n" + "mftbl %1\n" + "mftbu %2" + : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) + return (static_cast(tbu1) << 32) | tbl; +#endif #elif defined(__sparc__) int64_t tick; asm(".byte 0x83, 0x41, 0x00, 0x00"); @@ -167,16 +175,22 @@ #elif defined(__riscv) // RISC-V // Use RDCYCLE (and RDCYCLEH on riscv32) #if __riscv_xlen == 32 - uint64_t cycles_low, cycles_hi0, cycles_hi1; - asm("rdcycleh %0" : "=r"(cycles_hi0)); - asm("rdcycle %0" : "=r"(cycles_lo)); - asm("rdcycleh %0" : "=r"(cycles_hi1)); - // This matches the PowerPC overflow detection, above - cycles_lo &= -static_cast(cycles_hi0 == cycles_hi1); - return (cycles_hi1 << 32) | cycles_lo; + uint32_t cycles_lo, cycles_hi0, cycles_hi1; + // This asm also includes the PowerPC overflow handling strategy, as above. + // Implemented in assembly because Clang insisted on branching. + asm volatile( + "rdcycleh %0\n" + "rdcycle %1\n" + "rdcycleh %2\n" + "sub %0, %0, %2\n" + "seqz %0, %0\n" + "sub %0, zero, %0\n" + "and %1, %1, %0\n" + : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); + return (static_cast(cycles_hi1) << 32) | cycles_lo; #else uint64_t cycles; - asm("rdcycle %0" : "=r"(cycles)); + asm volatile("rdcycle %0" : "=r"(cycles)); return cycles; #endif #else