diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -330,6 +330,45 @@ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; +// @brief riscv64 support. +// +// RISC-V 64 supports lazy JITing. +class OrcRiscv64 { +public: + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 16; + static constexpr unsigned StubSize = 16; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0x148; + + /// Write the resolver code into the given memory. The user is + /// responsible for allocating the memory and setting permissions. + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); + + /// Write the requested number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverFnAddr, + unsigned NumTrampolines); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); +}; + } // end namespace orc } // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -261,6 +261,9 @@ case Triple::mips64el: return CreateWithABI(EPC); + case Triple::riscv64: + return CreateWithABI(EPC); + case Triple::x86_64: if (TT.getOS() == Triple::OSType::Win32) return CreateWithABI(EPC); diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -152,6 +152,11 @@ return CCMgrT::Create(ES, ErrorHandlerAddress); } + case Triple::riscv64: { + typedef orc::LocalJITCompileCallbackManager CCMgrT; + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + case Triple::x86_64: { if (T.getOS() == Triple::OSType::Win32) { typedef orc::LocalJITCompileCallbackManager CCMgrT; @@ -206,6 +211,12 @@ orc::LocalIndirectStubsManager>(); }; + case Triple::riscv64: + return []() { + return std::make_unique< + orc::LocalIndirectStubsManager>(); + }; + case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) { return [](){ diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -131,6 +131,10 @@ case Triple::mips64el: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); + case Triple::riscv64: + return LocalLazyCallThroughManager::Create(ES, + ErrorHandlerAddr); + case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) return LocalLazyCallThroughManager::Create( diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -906,5 +906,176 @@ Stub[8 * I + 7] = 0x00000000; // nop } } + +void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + + const uint32_t ResolverCode[] = { + 0xef810113, // 0x00: addi sp,sp,-264 + 0x00813023, // 0x04: sd s0,0(sp) + 0x00913423, // 0x08: sd s1,8(sp) + 0x01213823, // 0x0c: sd s2,16(sp) + 0x01313c23, // 0x10: sd s3,24(sp) + 0x03413023, // 0x14: sd s4,32(sp) + 0x03513423, // 0x18: sd s5,40(sp) + 0x03613823, // 0x1c: sd s6,48(sp) + 0x03713c23, // 0x20: sd s7,56(sp) + 0x05813023, // 0x24: sd s8,64(sp) + 0x05913423, // 0x28: sd s9,72(sp) + 0x05a13823, // 0x2c: sd s10,80(sp) + 0x05b13c23, // 0x30: sd s11,88(sp) + 0x06113023, // 0x34: sd ra,96(sp) + 0x06a13423, // 0x38: sd a0,104(sp) + 0x06b13823, // 0x3c: sd a1,112(sp) + 0x06c13c23, // 0x40: sd a2,120(sp) + 0x08d13023, // 0x44: sd a3,128(sp) + 0x08e13423, // 0x48: sd a4,136(sp) + 0x08f13823, // 0x4c: sd a5,144(sp) + 0x09013c23, // 0x50: sd a6,152(sp) + 0x0b113023, // 0x54: sd a7,160(sp) + 0x0a813427, // 0x58: fsd fs0,168(sp) + 0x0a913827, // 0x5c: fsd fs1,176(sp) + 0x0b213c27, // 0x60: fsd fs2,184(sp) + 0x0d313027, // 0x64: fsd fs3,192(sp) + 0x0d413427, // 0x68: fsd fs4,200(sp) + 0x0d513827, // 0x6c: fsd fs5,208(sp) + 0x0d613c27, // 0x70: fsd fs6,216(sp) + 0x0f713027, // 0x74: fsd fs7,224(sp) + 0x0f813427, // 0x78: fsd fs8,232(sp) + 0x0f913827, // 0x7c: fsd fs9,240(sp) + 0x0fa13c27, // 0x80: fsd fs10,248(sp) + 0x11b13027, // 0x84: fsd fs11,256(sp) + 0x00000517, // 0x88: auipc a0,0x0 + 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138 + 0x00030593, // 0x90: mv a1,t1 + 0xff458593, // 0x94: addi a1,a1,-12 + 0x00000617, // 0x98: auipc a2,0x0 + 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140 + 0x000600e7, // 0xa0: jalr a2 + 0x00050293, // 0xa4: mv t0,a0 + 0x00013403, // 0xa8: ld s0,0(sp) + 0x00813483, // 0xac: ld s1,8(sp) + 0x01013903, // 0xb0: ld s2,16(sp) + 0x01813983, // 0xb4: ld s3,24(sp) + 0x02013a03, // 0xb8: ld s4,32(sp) + 0x02813a83, // 0xbc: ld s5,40(sp) + 0x03013b03, // 0xc0: ld s6,48(sp) + 0x03813b83, // 0xc4: ld s7,56(sp) + 0x04013c03, // 0xc8: ld s8,64(sp) + 0x04813c83, // 0xcc: ld s9,72(sp) + 0x05013d03, // 0xd0: ld s10,80(sp) + 0x05813d83, // 0xd4: ld s11,88(sp) + 0x06013083, // 0xd8: ld ra,96(sp) + 0x06813503, // 0xdc: ld a0,104(sp) + 0x07013583, // 0xe0: ld a1,112(sp) + 0x07813603, // 0xe4: ld a2,120(sp) + 0x08013683, // 0xe8: ld a3,128(sp) + 0x08813703, // 0xec: ld a4,136(sp) + 0x09013783, // 0xf0: ld a5,144(sp) + 0x09813803, // 0xf4: ld a6,152(sp) + 0x0a013883, // 0xf8: ld a7,160(sp) + 0x0a813407, // 0xfc: fld fs0,168(sp) + 0x0b013487, // 0x100: fld fs1,176(sp) + 0x0b813907, // 0x104: fld fs2,184(sp) + 0x0c013987, // 0x108: fld fs3,192(sp) + 0x0c813a07, // 0x10c: fld fs4,200(sp) + 0x0d013a87, // 0x110: fld fs5,208(sp) + 0x0d813b07, // 0x114: fld fs6,216(sp) + 0x0e013b87, // 0x118: fld fs7,224(sp) + 0x0e813c07, // 0x11c: fld fs8,232(sp) + 0x0f013c87, // 0x120: fld fs9,240(sp) + 0x0f813d07, // 0x124: fld fs10,248(sp) + 0x10013d87, // 0x128: fld fs11,256(sp) + 0x10810113, // 0x12c: addi sp,sp,264 + 0x00028067, // 0x130: jr t0 + 0x12345678, // 0x134: padding to align at 8 byte + 0x12345678, // 0x138: Lreentry_ctx_ptr: + 0xdeadbeef, // 0x13c: .quad 0 + 0x98765432, // 0x140: Lreentry_fn_ptr: + 0xcafef00d // 0x144: .quad 0 + }; + + const unsigned ReentryCtxAddrOffset = 0x138; + const unsigned ReentryFnAddrOffset = 0x140; + + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint64_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint64_t)); +} + +void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, + unsigned NumTrampolines) { + + unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); + + memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, + sizeof(uint64_t)); + + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); + for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { + uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000; + uint32_t Lo12 = OffsetToPtr - Hi20; + Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) + Trampolines[4 * I + 1] = + 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) + Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0 + Trampolines[4 * I + 3] = 0xdeadface; // padding + } +} + +void OrcRiscv64::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 + // ld t0, %lo(t0) + // jr t0 ; Jump to resolver + // .quad 0 ; Pad to 16 bytes + // stub2: + // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 + // ld t0, %lo(t0) + // jr t0 ; Jump to resolver + // .quad 0 + // + // ... + // + // .section __orc_ptrs + // ptr1: + // .quad 0x0 + // ptr2: + // .quad 0x0 + // + // ... + + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); + + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + + for (unsigned I = 0; I < NumStubs; ++I) { + uint64_t PtrDisplacement = + PointersBlockTargetAddress - StubsBlockTargetAddress; + uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000; + uint32_t Lo12 = PtrDisplacement - Hi20; + Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) + Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) + Stub[4 * I + 2] = 0x00028067; // jr t0 + Stub[4 * I + 3] = 0xfeedbeef; // padding + PointersBlockTargetAddress += PointerSize; + StubsBlockTargetAddress += StubSize; + } +} + } // End namespace orc. } // End namespace llvm.