Index: lib/asan/asan_internal.h =================================================================== --- lib/asan/asan_internal.h +++ lib/asan/asan_internal.h @@ -102,6 +102,8 @@ bool PlatformHasDifferentMemcpyAndMemmove(); # define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE \ (PlatformHasDifferentMemcpyAndMemmove()) +#elif SANITIZER_WINDOWS64 +# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE false #else # define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true #endif // SANITIZER_MAC Index: lib/asan/asan_rtl.cc =================================================================== --- lib/asan/asan_rtl.cc +++ lib/asan/asan_rtl.cc @@ -463,6 +463,12 @@ kMidMemBeg = kLowMemEnd < 0x3000000000ULL ? 0x3000000000ULL : 0; kMidMemEnd = kLowMemEnd < 0x3000000000ULL ? 0x4fffffffffULL : 0; } +#elif SANITIZER_WINDOWS64 + // Disable the "mid mem" shadow layout. + if (!full_shadow_is_available) { + kMidMemBeg = 0; + kMidMemEnd = 0; + } #endif if (Verbosity()) PrintAddressSpaceLayout(); Index: lib/interception/interception_win.cc =================================================================== --- lib/interception/interception_win.cc +++ lib/interception/interception_win.cc @@ -43,6 +43,58 @@ *(ptrdiff_t*)(jmp_from + 1) = offset; } +static void WriteJumpInstruction_14I(char *jmp_from, char *to) { + // Jump to any address with 14 instruction bytes on x64. + // Ref: https://blogs.oracle.com/nike/entry/long_absolute_jumps_on_amd64 + // + // push DWORD ; = 68 XX XX XX XX + // ; The lower 32 bits of a 64-bit address. + // ; Note that "push" in 64-bit mode sign-extends + // ; an imm32 to 64-bit. + // mov [rsp+4], DWORD ; = c7 44 24 04 XX XX XX XX + // ; Overwrite the higher 32 bits. + // ret ; = c3 + // ; The jump makes use of full 64-bit address. + // + // Example machine codes to jump to 0x8877665544332211 are: + // Codes: 68 11 22 33 44 c7 44 24 04 55 66 77 88 c3. + // [68][LL][LL][LL][LL][c7][44][24][04][HH][HH][HH][HH][c3]. + // Index: -00--01--02--03--04--05--06--07--08--09--10--11--12--13- + // + // Lower 4 bytes of the full 64-bit address. + unsigned long long to_addr = (unsigned long long)to; + unsigned int to_addr_lower = (unsigned int)(to_addr & 0xFFFFFFFFull); + unsigned int to_addr_higher = (to_addr & 0xFFFFFFFF00000000) >> 32; + + jmp_from[0] = '\x68'; // Opcode for PUSH. + *(unsigned int*)(jmp_from + 1) = to_addr_lower; + jmp_from[5] = '\xC7'; // Opcodes for mov(4 bytes). + jmp_from[6] = '\x44'; + jmp_from[7] = '\x24'; + jmp_from[8] = '\x04'; + // Higher 4 bytes of the full 64-bit address. + *(unsigned int*)(jmp_from + 9) = to_addr_higher; + jmp_from[13] = '\xC3'; // Opcode for RET. +} + +static void WriteJumpInstruction_6I8D(char *jmp_from, char *to) { + // Jump to any address with 6 instruction bytes and 8 bytes of data. + // jmp [rip - 8] = ff 25 f8 ff ff ff + jmp_from[0] = '\xFF'; + jmp_from[1] = '\x25'; + jmp_from[2] = '\xF8'; + jmp_from[3] = '\xFF'; + jmp_from[4] = '\xFF'; + jmp_from[5] = '\xFF'; + // FIXME(wwchrome): We are betting on 0xCC paddings, but in practice + // it will sometimes stomp on valid bytes, in somecases will cause + // access violations. Need to implement a proper way that search + // for some room and handle violations. + // Put the full address at previous 8 bytes. + unsigned long long to_addr = (unsigned long long)to; + *(unsigned long long*)(jmp_from - 8) = to_addr; +} + static char *GetMemoryForTrampoline(size_t size) { // Trampolines are allocated from a common pool. const int POOL_SIZE = 1024; @@ -68,11 +120,74 @@ // Returns 0 on error. static size_t RoundUpToInstrBoundary(size_t size, char *code) { -#ifdef _WIN64 - // TODO(wwchrome): Implement similar logic for x64 instructions. - // Win64 RoundUpToInstrBoundary is not supported yet. - __debugbreak(); - return 0; +#if SANITIZER_WINDOWS64 + // Win64 RoundUpToInstrBoundary is a work in progress. + size_t cursor = 0; + while (cursor < size) { + switch (code[cursor]) { + case '\x57': // 57 : push rdi + cursor++; + continue; + case '\xb8': // b8 XX XX XX XX : mov eax, XX XX XX XX + cursor += 5; + continue; + } + + switch (*(unsigned short*)(code + cursor)) { // NOLINT + case 0x5540: // 40 55 : rex push rbp + case 0x5340: // 40 53 : rex push rbx + cursor += 2; + continue; + } + + switch (0x00FFFFFF & *(unsigned int*)(code + cursor)) { + case 0xc18b48: // 48 8b c1 : mov rax, rcx + case 0xc48b48: // 48 8b c4 : mov rax, rsp + case 0xd9f748: // 48 f7 d9 : neg rcx + case 0xd12b48: // 48 2b d1 : sub rdx, rcx + case 0x07c1f6: // f6 c1 07 : test cl, 0x7 + case 0xc0854d: // 4d 85 c0 : test r8, r8 + case 0xc2b60f: // 0f b6 c2 : movzx eax, dl + case 0xc03345: // 45 33 c0 : xor r8d, r8d + case 0xd98b4c: // 4c 8b d9 : mov r11, rcx + case 0xd28b4c: // 4c 8b d2 : mov r10, rdx + case 0xd2b60f: // 0f b6 d2 : movzx edx, dl + case 0xca2b48: // 48 2b ca : sub rcx, rdx + case 0x10b70f: // 0f b7 10 : movzx edx, WORD PTR [rax] + case 0xc00b4d: // 3d 0b c0 : or r8, r8 + case 0xd18b48: // 48 8b d1 : mov rdx, rcx + case 0xdc8b4c: // 4c 8b dc : mov r11,rsp + case 0xd18b4c: // 4c 8b d1 : mov r10, rcx + cursor += 3; + continue; + + case 0xec8348: // 48 83 ec XX : sub rsp, 0xXX + case 0xf88349: // 49 83 f8 XX : cmp r8, XX + case 0x588948: // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx + cursor += 4; + continue; + + case 0x058b48: // 48 8b 05 XX XX XX XX + // = mov rax, QWORD PTR [rip+ 0xXXXXXXXX] + case 0x25ff48: // 48 ff 25 XX XX XX XX + // = rex.W jmp QWORD PTR [rip + 0xXXXXXXXX] + cursor += 7; + continue; + } + + // Check first 5 bytes. + switch (0xFFFFFFFFFFull & *(unsigned long long*)(code + cursor)) { + case 0x08245c8948: // 48 89 5c 24 08 : mov QWORD PTR [rsp+0x8], rbx + case 0x1024748948: // 48 89 74 24 10 : mov QWORD PTR [rsp+0x10], rsi + cursor += 5; + continue; + } + + // Unknown instructions!!! + __debugbreak(); + } + + return cursor; #else size_t cursor = 0; while (cursor < size) { @@ -150,11 +265,56 @@ } bool OverrideFunction(uptr old_func, uptr new_func, uptr *orig_old_func) { -#ifdef _WIN64 - // TODO(wwchrome): Implement using x64 jmp. - // OverrideFunction is not yet supported on x64. - __debugbreak(); - return false; +#if SANITIZER_WINDOWS64 + // Function overriding works basically like this: + // We write "jmp [rip -8]" (6 bytes) at the beginning of the 'old_func' + // to override it. + // We might want to be able to execute the original 'old_func' from the + // wrapper, in this case we need to keep the leading 6+ bytes ('head') + // of the original code somewhere with a "jmp ". + // We call these 'head'+6 bytes of instructions a "trampoline". + char *old_bytes = (char *)old_func; + + // TODO(wwchrome): Merge with the 32-bit version maybe. + size_t kHeadMin = 6; // The minimum size of the head. + size_t head = kHeadMin; + if (orig_old_func) { + // Find out the number of bytes of the instructions we need to copy + // to the trampoline and store it in 'head'. + + head = RoundUpToInstrBoundary(kHeadMin, old_bytes); + if (!head) + return false; + + // Put the needed instructions into the trampoline bytes. + // Because the 'jmp' at the end of trampoline we can afford + // to use 14 bytes and it is simpler. + char *trampoline = GetMemoryForTrampoline(head + 14); + if (!trampoline) + return false; + _memcpy(trampoline, old_bytes, head); + WriteJumpInstruction_14I(trampoline + head, old_bytes + head); + *orig_old_func = (uptr)trampoline; + } + + // Now put the "jmp " instruction at the original code location. + // We should preserve the EXECUTE flag as some of our own code might be + // located in the same page (sic!). FIXME: might consider putting the + // __interception code into a separate section or something? + DWORD old_prot, unused_prot; + // Need to mark extra 8 bytes because jmp [rip -8] + if (!VirtualProtect((void *)(old_bytes - 8), head + 8, + PAGE_EXECUTE_READWRITE, &old_prot)) + return false; + + WriteJumpInstruction_6I8D(old_bytes, (char *)new_func); + _memset(old_bytes + kHeadMin, 0xCC /* int 3 */, head - kHeadMin); + + // Restore the original permissions. + if (!VirtualProtect((void *) (old_bytes - 8), head + 8, old_prot, &unused_prot)) // NOLINT + return false; // not clear if this failure bothers us. + + return true; #else // Function overriding works basically like this: // We write "jmp " (5 bytes) at the beginning of the 'old_func' Index: lib/sanitizer_common/sanitizer_platform.h =================================================================== --- lib/sanitizer_common/sanitizer_platform.h +++ lib/sanitizer_common/sanitizer_platform.h @@ -67,6 +67,12 @@ # define SANITIZER_WINDOWS 0 #endif +#if defined(_WIN64) +# define SANITIZER_WINDOWS64 1 +#else +# define SANITIZER_WINDOWS64 0 +#endif + #if defined(__ANDROID__) # define SANITIZER_ANDROID 1 #else Index: lib/sanitizer_common/sanitizer_platform_interceptors.h =================================================================== --- lib/sanitizer_common/sanitizer_platform_interceptors.h +++ lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -83,7 +83,15 @@ #define SANITIZER_INTERCEPT_MEMMOVE 1 #define SANITIZER_INTERCEPT_MEMCPY 1 #define SANITIZER_INTERCEPT_MEMCMP 1 +// TODO(wwchrome): Re-enable intercepting memchr() when ready. +// The function memchr() contains a jump in the first 6 bytes +// that is problematic to intercept correctly on Win64. +// Disable memchr() interception for Win64 temporarily. +#if SANITIZER_WINDOWS64 +#define SANITIZER_INTERCEPT_MEMCHR 0 +#else #define SANITIZER_INTERCEPT_MEMCHR 1 +#endif #define SANITIZER_INTERCEPT_MEMRCHR SI_FREEBSD || SI_LINUX #define SANITIZER_INTERCEPT_READ SI_NOT_WINDOWS