diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -369,6 +369,46 @@ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; +// @brief loongarch64 support. +// +// LoongArch 64 supports lazy JITing. +class OrcLoongArch64 { +public: + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 16; + static constexpr unsigned StubSize = 16; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0xc8; + + /// Write the resolver code into the given memory. The user is + /// responsible for allocating the memory and setting permissions. + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); + + /// Write the requested number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverFnAddr, + unsigned NumTrampolines); + + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); +}; + } // end namespace orc } // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -250,6 +250,9 @@ case Triple::x86: return CreateWithABI(EPC); + case Triple::loongarch64: + return CreateWithABI(EPC); + case Triple::mips: return CreateWithABI(EPC); diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -137,6 +137,11 @@ return CCMgrT::Create(ES, ErrorHandlerAddress); } + case Triple::loongarch64: { + typedef orc::LocalJITCompileCallbackManager CCMgrT; + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + case Triple::mips: { typedef orc::LocalJITCompileCallbackManager CCMgrT; return CCMgrT::Create(ES, ErrorHandlerAddress); @@ -192,6 +197,12 @@ orc::LocalIndirectStubsManager>(); }; + case Triple::loongarch64: + return []() { + return std::make_unique< + orc::LocalIndirectStubsManager>(); + }; + case Triple::mips: return [](){ return std::make_unique< diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -119,6 +119,10 @@ case Triple::x86: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); + case Triple::loongarch64: + return LocalLazyCallThroughManager::Create( + ES, ErrorHandlerAddr); + case Triple::mips: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -1077,5 +1077,158 @@ } } +void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + + LLVM_DEBUG({ + dbgs() << "Writing resolver code to " + << formatv("{0:x16}", ResolverTargetAddress) << "\n"; + }); + + const uint32_t ResolverCode[] = { + 0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78) + 0x29c00061, // 0x4: st.d $ra, $sp, 0 + 0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8) + 0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10) + 0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18) + 0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20) + 0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28) + 0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30) + 0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38) + 0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40) + 0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48) + 0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50) + 0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58) + 0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60) + 0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68) + 0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70) + 0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78) + 0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80) + 0x1c000004, // 0x48: pcaddu12i $a0, 0 + 0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70) + 0x001501a5, // 0x50: move $a1, $t1 + 0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4) + 0x1c000006, // 0x58: pcaddu12i $a2, 0 + 0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68) + 0x4c0000c1, // 0x60: jirl $ra, $a2, 0 + 0x0015008c, // 0x64: move $t0, $a0 + 0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80) + 0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78) + 0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70) + 0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68) + 0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60) + 0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58) + 0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50) + 0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48) + 0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40) + 0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38) + 0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30) + 0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28) + 0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20) + 0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18) + 0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10) + 0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8) + 0x28c00061, // 0xa8: ld.d $ra, $sp, 0 + 0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88) + 0x4c000180, // 0xb0: jr $t0 + 0x00000000, // 0xb4: padding to align at 8 bytes + 0x01234567, // 0xb8: Lreentry_ctx_ptr: + 0xdeedbeef, // 0xbc: .dword 0 + 0x98765432, // 0xc0: Lreentry_fn_ptr: + 0xcafef00d, // 0xc4: .dword 0 + }; + + const unsigned ReentryCtxAddrOffset = 0xb8; + const unsigned ReentryFnAddrOffset = 0xc0; + + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint64_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint64_t)); +} + +void OrcLoongArch64::writeTrampolines( + char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { + + LLVM_DEBUG({ + dbgs() << "Writing trampoline code to " + << formatv("{0:x16}", TrampolineBlockTargetAddress) << "\n"; + }); + + unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); + + memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, + sizeof(uint64_t)); + + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); + for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { + uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000; + uint32_t Lo12 = OffsetToPtr - Hi20; + Trampolines[4 * I + 0] = + 0x1c00000c | + (((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr) + Trampolines[4 * I + 1] = + 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) + Trampolines[4 * I + 2] = 0x4c00018d; // jirl $t1, $t0, 0 + Trampolines[4 * I + 3] = 0x0; // padding + } +} + +void OrcLoongArch64::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // pcaddu12i $t0, %pc_hi20(ptr1) ; PC-rel load of ptr1 + // ld.d $t0, $t0, %pc_lo12(ptr1) + // jr $t0 ; Jump to resolver + // .dword 0 ; Pad to 16 bytes + // stub2: + // pcaddu12i $t0, %pc_hi20(ptr2) ; PC-rel load of ptr2 + // ld.d $t0, $t0, %pc_lo12(ptr2) + // jr $t0 ; Jump to resolver + // .dword 0 ; Pad to 16 bytes + // ... + // + // .section __orc_ptrs + // ptr1: + // .dword 0x0 + // ptr2: + // .dword 0x0 + // ... + LLVM_DEBUG({ + dbgs() << "Writing stubs code to " + << formatv("{0:x16}", StubsBlockTargetAddress) << "\n"; + }); + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); + + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + + for (unsigned I = 0; I < NumStubs; ++I) { + uint64_t PtrDisplacement = + PointersBlockTargetAddress - StubsBlockTargetAddress; + uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000; + uint32_t Lo12 = PtrDisplacement - Hi20; + Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff) + << 5); // pcaddu12i $t0, %pc_hi20(Lptr) + Stub[4 * I + 1] = + 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) + Stub[4 * I + 2] = 0x4c000180; // jr $t0 + Stub[4 * I + 3] = 0x0; // padding + PointersBlockTargetAddress += PointerSize; + StubsBlockTargetAddress += StubSize; + } +} + } // End namespace orc. } // End namespace llvm.