diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -91,12 +91,29 @@ #else compilerrt_abort(); #endif -#elif defined(__linux__) && defined(__mips__) +#elif defined(__mips__) const uintptr_t start_int = (uintptr_t)start; const uintptr_t end_int = (uintptr_t)end; - syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); -#elif defined(__mips__) && defined(__OpenBSD__) - cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE); + uintptr_t synci_step; + __asm__ __volatile__("rdhwr %0, $1\n\t" : "=r"(synci_step)); + if (synci_step != 0) { +#if __mips_isa_rev >= 6 + uintptr_t pc_nop; + for (uintptr_t p = start_int; p < end_int; p += synci_step) { + __asm__ __volatile__("synci 0(%0)\n\t" : : "r"(p)); + } + __asm__ __volatile__("sync\n\t"); + __asm__ __volatile__("addiupc %0, 12\n\t" : "=r"(pc_nop)); + __asm__ __volatile__("jr.hb %0\n\t" : : "r"(pc_nop)); + // jr.hb target, not delay slot, pc_nop points to here + __asm__ __volatile__("nop\n\t"); +#else + // pre-R6 may not globalized + // And some implementations may give out strange Synci_Step. + // So, let's use libc call for it. + cacheflush(start, end_int - start_int, BCACHE); +#endif + } #elif defined(__aarch64__) && !defined(__APPLE__) uint64_t xstart = (uint64_t)(uintptr_t)start; uint64_t xend = (uint64_t)(uintptr_t)end;