Index: COFF/DLL.h =================================================================== --- COFF/DLL.h +++ COFF/DLL.h @@ -49,7 +49,8 @@ uint64_t getDirSize(); private: - Chunk *newThunkChunk(DefinedImportData *S, Chunk *Dir); + Chunk *newThunkChunk(DefinedImportData *S, Chunk *TailMerge); + Chunk *newTailMergeChunk(Chunk *Dir); Defined *Helper; std::vector Imports; Index: COFF/DLL.cpp =================================================================== --- COFF/DLL.cpp +++ COFF/DLL.cpp @@ -191,6 +191,11 @@ // which then overwrites its jump table slot with the result // for subsequent function calls. static const uint8_t ThunkX64[] = { + 0x48, 0x8D, 0x05, 0, 0, 0, 0, // lea rax, [__imp_] + 0xE9, 0, 0, 0, 0, // jmp __tailMerge_ +}; + +static const uint8_t TailMergeX64[] = { 0x51, // push rcx 0x52, // push rdx 0x41, 0x50, // push r8 @@ -200,7 +205,7 @@ 0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1 0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2 0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3 - 0x48, 0x8D, 0x15, 0, 0, 0, 0, // lea rdx, [__imp_] + 0x48, 0x8B, 0xD0, // mov rdx, rax 0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...] 0xE8, 0, 0, 0, 0, // call __delayLoadHelper2 0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp] @@ -216,9 +221,14 @@ }; static const uint8_t ThunkX86[] = { + 0xB8, 0, 0, 0, 0, // mov eax, offset ___imp__ + 0xE9, 0, 0, 0, 0, // jmp __tailMerge_ +}; + +static const uint8_t TailMergeX86[] = { 0x51, // push ecx 0x52, // push edx - 0x68, 0, 0, 0, 0, // push offset ___imp__ + 0x50, // push eax 0x68, 0, 0, 0, 0, // push offset ___DELAY_IMPORT_DESCRIPTOR__dll 0xE8, 0, 0, 0, 0, // call ___delayLoadHelper2@8 0x5A, // pop edx @@ -229,6 +239,10 @@ static const uint8_t ThunkARM[] = { 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 __imp_ 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 __imp_ + 0x00, 0xf0, 0x00, 0xb8, // b.w __tailMerge_ +}; + +static const uint8_t TailMergeARM[] = { 0x2d, 0xe9, 0x0f, 0x48, // push.w {r0, r1, r2, r3, r11, lr} 0x0d, 0xf2, 0x10, 0x0b, // addw r11, sp, #16 0x2d, 0xed, 0x10, 0x0b, // vpush {d0, d1, d2, d3, d4, d5, d6, d7} @@ -245,6 +259,10 @@ static const uint8_t ThunkARM64[] = { 0x11, 0x00, 0x00, 0x90, // adrp x17, #0 __imp_ 0x31, 0x02, 0x00, 0x91, // add x17, x17, #0 :lo12:__imp_ + 0x00, 0x00, 0x00, 0x14, // b __tailMerge_ +}; + +static const uint8_t TailMergeARM64[] = { 0xfd, 0x7b, 0xb3, 0xa9, // stp x29, x30, [sp, #-208]! 0xfd, 0x03, 0x00, 0x91, // mov x29, sp 0xe0, 0x07, 0x01, 0xa9, // stp x0, x1, [sp, #16] @@ -275,75 +293,119 @@ // A chunk for the delay import thunk. class ThunkChunkX64 : public NonSectionChunk { public: - ThunkChunkX64(Defined *I, Chunk *D, Defined *H) - : Imp(I), Desc(D), Helper(H) {} + ThunkChunkX64(Defined *I, Chunk *TM) : Imp(I), TailMerge(TM) {} size_t getSize() const override { return sizeof(ThunkX64); } void writeTo(uint8_t *Buf) const override { memcpy(Buf, ThunkX64, sizeof(ThunkX64)); - write32le(Buf + 36, Imp->getRVA() - RVA - 40); - write32le(Buf + 43, Desc->getRVA() - RVA - 47); - write32le(Buf + 48, Helper->getRVA() - RVA - 52); + write32le(Buf + 3, Imp->getRVA() - RVA - 7); + write32le(Buf + 8, TailMerge->getRVA() - RVA - 12); } Defined *Imp = nullptr; + Chunk *TailMerge = nullptr; +}; + +class TailMergeChunkX64 : public NonSectionChunk { +public: + TailMergeChunkX64(Chunk *D, Defined *H) : Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(TailMergeX64); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf, TailMergeX64, sizeof(TailMergeX64)); + write32le(Buf + 39, Desc->getRVA() - RVA - 43); + write32le(Buf + 44, Helper->getRVA() - RVA - 48); + } + Chunk *Desc = nullptr; Defined *Helper = nullptr; }; class ThunkChunkX86 : public NonSectionChunk { public: - ThunkChunkX86(Defined *I, Chunk *D, Defined *H) - : Imp(I), Desc(D), Helper(H) {} + ThunkChunkX86(Defined *I, Chunk *TM) : Imp(I), TailMerge(TM) {} size_t getSize() const override { return sizeof(ThunkX86); } void writeTo(uint8_t *Buf) const override { memcpy(Buf, ThunkX86, sizeof(ThunkX86)); - write32le(Buf + 3, Imp->getRVA() + Config->ImageBase); - write32le(Buf + 8, Desc->getRVA() + Config->ImageBase); - write32le(Buf + 13, Helper->getRVA() - RVA - 17); + write32le(Buf + 1, Imp->getRVA() + Config->ImageBase); + write32le(Buf + 6, TailMerge->getRVA() - RVA - 10); } void getBaserels(std::vector *Res) override { - Res->emplace_back(RVA + 3); - Res->emplace_back(RVA + 8); + Res->emplace_back(RVA + 1); } Defined *Imp = nullptr; + Chunk *TailMerge = nullptr; +}; + +class TailMergeChunkX86 : public NonSectionChunk { +public: + TailMergeChunkX86(Chunk *D, Defined *H) : Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(TailMergeX86); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf, TailMergeX86, sizeof(TailMergeX86)); + write32le(Buf + 4, Desc->getRVA() + Config->ImageBase); + write32le(Buf + 9, Helper->getRVA() - RVA - 13); + } + + void getBaserels(std::vector *Res) override { + Res->emplace_back(RVA + 4); + } + Chunk *Desc = nullptr; Defined *Helper = nullptr; }; class ThunkChunkARM : public NonSectionChunk { public: - ThunkChunkARM(Defined *I, Chunk *D, Defined *H) - : Imp(I), Desc(D), Helper(H) {} + ThunkChunkARM(Defined *I, Chunk *TM) : Imp(I), TailMerge(TM) {} size_t getSize() const override { return sizeof(ThunkARM); } void writeTo(uint8_t *Buf) const override { memcpy(Buf, ThunkARM, sizeof(ThunkARM)); applyMOV32T(Buf + 0, Imp->getRVA() + Config->ImageBase); - applyMOV32T(Buf + 22, Desc->getRVA() + Config->ImageBase); - applyBranch24T(Buf + 30, Helper->getRVA() - RVA - 34); + applyBranch24T(Buf + 8, TailMerge->getRVA() - RVA - 12); } void getBaserels(std::vector *Res) override { Res->emplace_back(RVA + 0, IMAGE_REL_BASED_ARM_MOV32T); - Res->emplace_back(RVA + 22, IMAGE_REL_BASED_ARM_MOV32T); } Defined *Imp = nullptr; + Chunk *TailMerge = nullptr; +}; + +class TailMergeChunkARM : public NonSectionChunk { +public: + TailMergeChunkARM(Chunk *D, Defined *H) : Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(TailMergeARM); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf, TailMergeARM, sizeof(TailMergeARM)); + applyMOV32T(Buf + 14, Desc->getRVA() + Config->ImageBase); + applyBranch24T(Buf + 22, Helper->getRVA() - RVA - 26); + } + + void getBaserels(std::vector *Res) override { + Res->emplace_back(RVA + 14, IMAGE_REL_BASED_ARM_MOV32T); + } + Chunk *Desc = nullptr; Defined *Helper = nullptr; }; class ThunkChunkARM64 : public NonSectionChunk { public: - ThunkChunkARM64(Defined *I, Chunk *D, Defined *H) - : Imp(I), Desc(D), Helper(H) {} + ThunkChunkARM64(Defined *I, Chunk *TM) : Imp(I), TailMerge(TM) {} size_t getSize() const override { return sizeof(ThunkARM64); } @@ -351,12 +413,26 @@ memcpy(Buf, ThunkARM64, sizeof(ThunkARM64)); applyArm64Addr(Buf + 0, Imp->getRVA(), RVA + 0, 12); applyArm64Imm(Buf + 4, Imp->getRVA() & 0xfff, 0); - applyArm64Addr(Buf + 52, Desc->getRVA(), RVA + 52, 12); - applyArm64Imm(Buf + 56, Desc->getRVA() & 0xfff, 0); - applyArm64Branch26(Buf + 60, Helper->getRVA() - RVA - 60); + applyArm64Branch26(Buf + 8, TailMerge->getRVA() - RVA - 8); } Defined *Imp = nullptr; + Chunk *TailMerge = nullptr; +}; + +class TailMergeChunkARM64 : public NonSectionChunk { +public: + TailMergeChunkARM64(Chunk *D, Defined *H) : Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(TailMergeARM64); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf, TailMergeARM64, sizeof(TailMergeARM64)); + applyArm64Addr(Buf + 44, Desc->getRVA(), RVA + 44, 12); + applyArm64Imm(Buf + 48, Desc->getRVA() & 0xfff, 0); + applyArm64Branch26(Buf + 52, Helper->getRVA() - RVA - 52); + } + Chunk *Desc = nullptr; Defined *Helper = nullptr; }; @@ -556,8 +632,9 @@ auto *Dir = make(DLLNames.back()); size_t Base = Addresses.size(); + Chunk *TM = newTailMergeChunk(Dir); for (DefinedImportData *S : Syms) { - Chunk *T = newThunkChunk(S, Dir); + Chunk *T = newThunkChunk(S, TM); auto *A = make(T); Addresses.push_back(A); Thunks.push_back(T); @@ -570,6 +647,7 @@ HintNames.push_back(C); } } + Thunks.push_back(TM); // Terminate with null values. Addresses.push_back(make(8)); Names.push_back(make(8)); @@ -590,16 +668,32 @@ Dirs.push_back(make(sizeof(delay_import_directory_table_entry))); } -Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) { +Chunk *DelayLoadContents::newTailMergeChunk(Chunk *Dir) { + switch (Config->Machine) { + case AMD64: + return make(Dir, Helper); + case I386: + return make(Dir, Helper); + case ARMNT: + return make(Dir, Helper); + case ARM64: + return make(Dir, Helper); + default: + llvm_unreachable("unsupported machine type"); + } +} + +Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, + Chunk *TailMerge) { switch (Config->Machine) { case AMD64: - return make(S, Dir, Helper); + return make(S, TailMerge); case I386: - return make(S, Dir, Helper); + return make(S, TailMerge); case ARMNT: - return make(S, Dir, Helper); + return make(S, TailMerge); case ARM64: - return make(S, Dir, Helper); + return make(S, TailMerge); default: llvm_unreachable("unsupported machine type"); } Index: test/COFF/arm64-delayimport.yaml =================================================================== --- test/COFF/arm64-delayimport.yaml +++ test/COFF/arm64-delayimport.yaml @@ -7,31 +7,32 @@ # DISASM: 140001014: 11 00 00 d0 adrp x17, #8192 # DISASM: 140001018: 31 22 00 91 add x17, x17, #8 -# DISASM: 14000101c: fd 7b b3 a9 stp x29, x30, [sp, #-208]! -# DISASM: 140001020: fd 03 00 91 mov x29, sp -# DISASM: 140001024: e0 07 01 a9 stp x0, x1, [sp, #16] -# DISASM: 140001028: e2 0f 02 a9 stp x2, x3, [sp, #32] -# DISASM: 14000102c: e4 17 03 a9 stp x4, x5, [sp, #48] -# DISASM: 140001030: e6 1f 04 a9 stp x6, x7, [sp, #64] -# DISASM: 140001034: e0 87 02 ad stp q0, q1, [sp, #80] -# DISASM: 140001038: e2 8f 03 ad stp q2, q3, [sp, #112] -# DISASM: 14000103c: e4 97 04 ad stp q4, q5, [sp, #144] -# DISASM: 140001040: e6 9f 05 ad stp q6, q7, [sp, #176] -# DISASM: 140001044: e1 03 11 aa mov x1, x17 -# DISASM: 140001048: 00 00 00 b0 adrp x0, #4096 -# DISASM: 14000104c: 00 00 00 91 add x0, x0, #0 -# DISASM: 140001050: ec ff ff 97 bl #-80 <.text> -# DISASM: 140001054: f0 03 00 aa mov x16, x0 -# DISASM: 140001058: e6 9f 45 ad ldp q6, q7, [sp, #176] -# DISASM: 14000105c: e4 97 44 ad ldp q4, q5, [sp, #144] -# DISASM: 140001060: e2 8f 43 ad ldp q2, q3, [sp, #112] -# DISASM: 140001064: e0 87 42 ad ldp q0, q1, [sp, #80] -# DISASM: 140001068: e6 1f 44 a9 ldp x6, x7, [sp, #64] -# DISASM: 14000106c: e4 17 43 a9 ldp x4, x5, [sp, #48] -# DISASM: 140001070: e2 0f 42 a9 ldp x2, x3, [sp, #32] -# DISASM: 140001074: e0 07 41 a9 ldp x0, x1, [sp, #16] -# DISASM: 140001078: fd 7b cd a8 ldp x29, x30, [sp], #208 -# DISASM: 14000107c: 00 02 1f d6 br x16 +# DISASM: 14000101c: 01 00 00 14 b #4 <.text+0x20> +# DISASM: 140001020: fd 7b b3 a9 stp x29, x30, [sp, #-208]! +# DISASM: 140001024: fd 03 00 91 mov x29, sp +# DISASM: 140001028: e0 07 01 a9 stp x0, x1, [sp, #16] +# DISASM: 14000102c: e2 0f 02 a9 stp x2, x3, [sp, #32] +# DISASM: 140001030: e4 17 03 a9 stp x4, x5, [sp, #48] +# DISASM: 140001034: e6 1f 04 a9 stp x6, x7, [sp, #64] +# DISASM: 140001038: e0 87 02 ad stp q0, q1, [sp, #80] +# DISASM: 14000103c: e2 8f 03 ad stp q2, q3, [sp, #112] +# DISASM: 140001040: e4 97 04 ad stp q4, q5, [sp, #144] +# DISASM: 140001044: e6 9f 05 ad stp q6, q7, [sp, #176] +# DISASM: 140001048: e1 03 11 aa mov x1, x17 +# DISASM: 14000104c: 00 00 00 b0 adrp x0, #4096 +# DISASM: 140001050: 00 00 00 91 add x0, x0, #0 +# DISASM: 140001054: eb ff ff 97 bl #-84 <.text> +# DISASM: 140001058: f0 03 00 aa mov x16, x0 +# DISASM: 14000105c: e6 9f 45 ad ldp q6, q7, [sp, #176] +# DISASM: 140001060: e4 97 44 ad ldp q4, q5, [sp, #144] +# DISASM: 140001064: e2 8f 43 ad ldp q2, q3, [sp, #112] +# DISASM: 140001068: e0 87 42 ad ldp q0, q1, [sp, #80] +# DISASM: 14000106c: e6 1f 44 a9 ldp x6, x7, [sp, #64] +# DISASM: 140001070: e4 17 43 a9 ldp x4, x5, [sp, #48] +# DISASM: 140001074: e2 0f 42 a9 ldp x2, x3, [sp, #32] +# DISASM: 140001078: e0 07 41 a9 ldp x0, x1, [sp, #16] +# DISASM: 14000107c: fd 7b cd a8 ldp x29, x30, [sp], #208 +# DISASM: 140001080: 00 02 1f d6 br x16 # IMPORTS: Format: COFF-ARM64 # IMPORTS: Arch: aarch64 Index: test/COFF/delayimports-armnt.yaml =================================================================== --- test/COFF/delayimports-armnt.yaml +++ test/COFF/delayimports-armnt.yaml @@ -35,7 +35,7 @@ # BASEREL-NEXT: } # BASEREL-NEXT: Entry { # BASEREL-NEXT: Type: ARM_MOV32(T) -# BASEREL-NEXT: Address: 0x1022 +# BASEREL-NEXT: Address: 0x1026 # BASEREL-NEXT: } # BASEREL-NEXT: Entry { # BASEREL-NEXT: Type: ABSOLUTE @@ -53,13 +53,14 @@ # # DISASM: 40100c: 43 f2 08 0c movw r12, #12296 # DISASM-NEXT: c0 f2 40 0c movt r12, #64 +# DISASM-NEXT: 00 f0 00 b8 b.w #0 # DISASM-NEXT: 2d e9 0f 48 push.w {r0, r1, r2, r3, r11, lr} # DISASM-NEXT: 0d f2 10 0b addw r11, sp, #16 # DISASM-NEXT: 2d ed 10 0b vpush {d0, d1, d2, d3, d4, d5, d6, d7} # DISASM-NEXT: 61 46 mov r1, r12 # DISASM-NEXT: 42 f2 00 00 movw r0, #8192 # DISASM-NEXT: c0 f2 40 00 movt r0, #64 -# DISASM-NEXT: ff f7 e9 ff bl #-46 +# DISASM-NEXT: ff f7 e7 ff bl #-50 # DISASM-NEXT: 84 46 mov r12, r0 # DISASM-NEXT: bd ec 10 0b vpop {d0, d1, d2, d3, d4, d5, d6, d7} # DISASM-NEXT: bd e8 0f 48 pop.w {r0, r1, r2, r3, r11, lr} Index: test/COFF/delayimports.test =================================================================== --- test/COFF/delayimports.test +++ test/COFF/delayimports.test @@ -18,11 +18,11 @@ IMPORT-NEXT: } IMPORT-NEXT: Import { IMPORT-NEXT: Symbol: (50) -IMPORT-NEXT: Address: 0x1400010BD +IMPORT-NEXT: Address: 0x140001072 IMPORT-NEXT: } IMPORT-NEXT: Import { IMPORT-NEXT: Symbol: MessageBoxA (0) -IMPORT-NEXT: Address: 0x140001114 +IMPORT-NEXT: Address: 0x14000107E IMPORT-NEXT: } IMPORT-NEXT: } Index: test/COFF/delayimports32.test =================================================================== --- test/COFF/delayimports32.test +++ test/COFF/delayimports32.test @@ -24,7 +24,7 @@ IMPORT-NEXT: } IMPORT-NEXT: Import { IMPORT-NEXT: Symbol: MessageBoxA (0) -IMPORT-NEXT: Address: 0x40103E +IMPORT-NEXT: Address: 0x401033 IMPORT-NEXT: } IMPORT-NEXT: } @@ -47,19 +47,19 @@ BASEREL-NEXT: } BASEREL-NEXT: Entry { BASEREL-NEXT: Type: HIGHLOW -BASEREL-NEXT: Address: 0x102C +BASEREL-NEXT: Address: 0x102A BASEREL-NEXT: } BASEREL-NEXT: Entry { BASEREL-NEXT: Type: HIGHLOW -BASEREL-NEXT: Address: 0x1031 +BASEREL-NEXT: Address: 0x1034 BASEREL-NEXT: } BASEREL-NEXT: Entry { BASEREL-NEXT: Type: HIGHLOW BASEREL-NEXT: Address: 0x1041 BASEREL-NEXT: } BASEREL-NEXT: Entry { -BASEREL-NEXT: Type: HIGHLOW -BASEREL-NEXT: Address: 0x1046 +BASEREL-NEXT: Type: ABSOLUTE +BASEREL-NEXT: Address: 0x1000 BASEREL-NEXT: } BASEREL-NEXT: Entry { BASEREL-NEXT: Type: HIGHLOW @@ -71,17 +71,15 @@ BASEREL-NEXT: } BASEREL-NEXT: ] -DISASM: 102b: 68 20 30 40 00 pushl $4206624 -DISASM-NEXT: 1030: 68 00 20 40 00 pushl $4202496 -DISASM-NEXT: 1035: e8 c6 ff ff ff calll -58 <.text> -DISASM-NEXT: 103a: 5a popl %edx -DISASM-NEXT: 103b: 59 popl %ecx -DISASM-NEXT: 103c: ff e0 jmpl *%eax -DISASM-NEXT: 103e: 51 pushl %ecx -DISASM-NEXT: 103f: 52 pushl %edx -DISASM-NEXT: 1040: 68 24 30 40 00 pushl $4206628 -DISASM-NEXT: 1045: 68 00 20 40 00 pushl $4202496 -DISASM-NEXT: 104a: e8 b1 ff ff ff calll -79 <.text> -DISASM-NEXT: 104f: 5a popl %edx -DISASM-NEXT: 1050: 59 popl %ecx -DISASM-NEXT: 1051: ff e0 jmpl *%eax +DISASM: 1029: b8 20 30 40 00 movl $4206624, %eax +DISASM-NEXT: 102e: e9 0a 00 00 00 jmp 10 <.text+0x3d> +DISASM-NEXT: 1033: b8 24 30 40 00 movl $4206628, %eax +DISASM-NEXT: 1038: e9 00 00 00 00 jmp 0 <.text+0x3d> +DISASM-NEXT: 103d: 51 pushl %ecx +DISASM-NEXT: 103e: 52 pushl %edx +DISASM-NEXT: 103f: 50 pushl %eax +DISASM-NEXT: 1040: 68 00 20 40 00 pushl $4202496 +DISASM-NEXT: 1045: e8 b6 ff ff ff calll -74 <.text> +DISASM-NEXT: 104a: 5a popl %edx +DISASM-NEXT: 104b: 59 popl %ecx +DISASM-NEXT: 104c: ff e0 jmpl *%eax Index: test/COFF/delayimporttables.yaml =================================================================== --- test/COFF/delayimporttables.yaml +++ test/COFF/delayimporttables.yaml @@ -23,7 +23,7 @@ # CHECK-NEXT: } # CHECK-NEXT: Import { # CHECK-NEXT: Symbol: two (0) -# CHECK-NEXT: Address: 0x1400010A4 +# CHECK-NEXT: Address: 0x140001059 # CHECK-NEXT: } # CHECK-NEXT: } # CHECK-NEXT: DelayImport { @@ -36,11 +36,11 @@ # CHECK-NEXT: UnloadDelayImportTable: 0x0 # CHECK-NEXT: Import { # CHECK-NEXT: Symbol: left (0) -# CHECK-NEXT: Address: 0x1400010FB +# CHECK-NEXT: Address: 0x1400010B8 # CHECK-NEXT: } # CHECK-NEXT: Import { # CHECK-NEXT: Symbol: right (0) -# CHECK-NEXT: Address: 0x140001152 +# CHECK-NEXT: Address: 0x1400010C4 # CHECK-NEXT: } # CHECK-NEXT: }