Index: llvm/include/llvm/MC/MCWinEH.h =================================================================== --- llvm/include/llvm/MC/MCWinEH.h +++ llvm/include/llvm/MC/MCWinEH.h @@ -26,6 +26,12 @@ Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off) : Label(L), Offset(Off), Register(Reg), Operation(Op) {} + + bool operator==(const Instruction &I) const { + return Offset == I.Offset && Register == I.Register && + Operation == I.Operation; + } + bool operator!=(const Instruction &I) const { return !(*this == I); } }; struct FrameInfo { Index: llvm/lib/MC/MCWin64EH.cpp =================================================================== --- llvm/lib/MC/MCWin64EH.cpp +++ llvm/lib/MC/MCWin64EH.cpp @@ -264,8 +264,7 @@ return value; } -static uint32_t -ARM64CountOfUnwindCodes(const std::vector &Insns) { +static uint32_t ARM64CountOfUnwindCodes(ArrayRef Insns) { uint32_t Count = 0; for (const auto &I : Insns) { switch (static_cast(I.Operation)) { @@ -553,18 +552,23 @@ // Convert 2-byte opcodes into equivalent 1-byte ones. if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) { Inst.Operation = Win64EH::UOP_SaveFPLR; + Inst.Register = -1; } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && Inst.Register == 29) { Inst.Operation = Win64EH::UOP_SaveFPLRX; + Inst.Register = -1; } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && Inst.Register == 19) { Inst.Operation = Win64EH::UOP_SaveR19R20X; + Inst.Register = -1; } else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) { Inst.Operation = Win64EH::UOP_SetFP; } else if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == PrevRegister + 2 && Inst.Offset == PrevOffset + 16) { Inst.Operation = Win64EH::UOP_SaveNext; + Inst.Register = -1; + Inst.Offset = 0; // Intentionally not creating UOP_SaveNext for float register pairs, // as current versions of Windows (up to at least 20.04) is buggy // regarding SaveNext for float pairs. @@ -601,6 +605,48 @@ } } +static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, + int PrologCodeBytes) { + // Can only pack if there's one single epilog + if (info->EpilogMap.size() != 1) + return -1; + + const std::vector &Epilog = + info->EpilogMap.begin()->second; + + // Can pack if the epilog is a subset of the prolog but not vice versa + if (Epilog.size() > info->Instructions.size()) + return -1; + + // Check that the epilog actually is a perfect match for the end (backwrds) + // of the prolog. + for (int I = Epilog.size() - 1; I >= 0; I--) { + if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I]) + return -1; + } + + // Check that the epilog actually is at the very end of the function, + // otherwise it can't be packed. + uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference( + streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first); + if (DistanceFromEnd / 4 != Epilog.size()) + return -1; + + int Offset = ARM64CountOfUnwindCodes( + ArrayRef(&info->Instructions[Epilog.size()], + info->Instructions.size() - Epilog.size())); + + // Check that the offset and prolog size fits in the first word; it's + // unclear whether the epilog count in the extension word can be taken + // as packed epilog offset. + if (Offset <= 31 && PrologCodeBytes <= 124) { + info->EpilogMap.clear(); + return Offset; + } + + return -1; +} + // Populate the .xdata section. The format of .xdata on ARM64 is documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { @@ -679,6 +725,8 @@ uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions); uint32_t TotalCodeBytes = PrologCodeBytes; + int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes); + // Process epilogs. MapVector EpilogInfo; // Epilogs processed so far. @@ -711,15 +759,17 @@ uint32_t CodeWordsMod = TotalCodeBytes % 4; if (CodeWordsMod) CodeWords++; - uint32_t EpilogCount = info->EpilogMap.size(); + uint32_t EpilogCount = + PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size(); bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124; if (!ExtensionWord) { row1 |= (EpilogCount & 0x1F) << 22; row1 |= (CodeWords & 0x1F) << 27; } - // E is always 0 right now, TODO: packed epilog setup if (info->HandlesExceptions) // X row1 |= 1 << 20; + if (PackedEpilogOffset >= 0) // E + row1 |= 1 << 21; row1 |= FuncLength & 0x3FFFF; streamer.emitInt32(row1); Index: llvm/test/CodeGen/AArch64/wineh3.mir =================================================================== --- llvm/test/CodeGen/AArch64/wineh3.mir +++ llvm/test/CodeGen/AArch64/wineh3.mir @@ -8,9 +8,9 @@ # CHECK-NEXT: FunctionLength: 124 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 32 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 0 +# CHECK-NEXT: ByteCodeLength: 16 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96] # CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80] @@ -21,22 +21,6 @@ # CHECK-NEXT: 0xda8d ; stp d10, d11, [sp, #-112]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 23 -# CHECK-NEXT: EpilogueStartIndex: 15 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96] -# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80] -# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64] -# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48] -# CHECK-NEXT: 0xca04 ; ldp x27, x28, [sp, #32] -# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16] -# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: ] # CHECK-NEXT: } ... --- Index: llvm/test/CodeGen/AArch64/wineh6.mir =================================================================== --- llvm/test/CodeGen/AArch64/wineh6.mir +++ llvm/test/CodeGen/AArch64/wineh6.mir @@ -6,25 +6,19 @@ # CHECK-NEXT: FunctionLength: 92 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 8 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 1 +# CHECK-NEXT: ByteCodeLength: 4 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0x02 ; sub sp, #32 # CHECK-NEXT: 0xe1 ; mov fp, sp # CHECK-NEXT: 0x81 ; stp x29, x30, [sp, #-16]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 20 -# CHECK-NEXT: EpilogueStartIndex: 4 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe1 ; mov sp, fp -# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } +# CHECK-NEXT: Epilogue [ +# CHECK-NEXT: 0xe1 ; mov sp, fp +# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16 +# CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] # CHECK-NEXT: } ... Index: llvm/test/CodeGen/AArch64/wineh7.mir =================================================================== --- llvm/test/CodeGen/AArch64/wineh7.mir +++ llvm/test/CodeGen/AArch64/wineh7.mir @@ -6,9 +6,9 @@ # CHECK-NEXT: FunctionLength: 72 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 16 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 0 +# CHECK-NEXT: ByteCodeLength: 8 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0xe204 ; add fp, sp, #32 # CHECK-NEXT: 0x44 ; stp x29, x30, [sp, #32] @@ -16,19 +16,6 @@ # CHECK-NEXT: 0xcc85 ; stp x21, x22, [sp, #-48]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 13 -# CHECK-NEXT: EpilogueStartIndex: 8 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe204 ; sub sp, fp, #32 -# CHECK-NEXT: 0x44 ; ldp x29, x30, [sp, #32] -# CHECK-NEXT: 0xc802 ; ldp x19, x20, [sp, #16] -# CHECK-NEXT: 0xcc85 ; ldp x21, x22, [sp], #48 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: } Index: llvm/test/MC/AArch64/seh-packed-epilog.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/seh-packed-epilog.s @@ -0,0 +1,186 @@ +// This test checks that the epilogue is packed where possible. + +// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s | llvm-readobj -u - | FileCheck %s + +// CHECK: UnwindInformation [ +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func +// CHECK-NEXT: ExceptionRecord: .xdata +// CHECK-NEXT: ExceptionData { +// CHECK-NEXT: FunctionLength: +// CHECK-NEXT: Version: +// CHECK-NEXT: ExceptionData: +// CHECK-NEXT: EpiloguePacked: Yes +// CHECK-NEXT: EpilogueOffset: 2 +// CHECK-NEXT: ByteCodeLength: +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: 0xdc04 ; str d8, [sp, #32] +// CHECK-NEXT: 0xe1 ; mov fp, sp +// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16] +// CHECK-NEXT: 0x85 ; stp x29, x30, [sp, #-48]! +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]! +// CHECK-NEXT: 0xc842 ; stp x20, x21, [sp, #16] +// CHECK-NEXT: 0x03 ; sub sp, #48 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: Epilogue [ +// CHECK-NEXT: 0xe1 ; mov sp, fp +// CHECK-NEXT: 0x42 ; ldp x29, x30, [sp, #16] +// CHECK-NEXT: 0x85 ; ldp x29, x30, [sp], #48 +// CHECK-NEXT: 0xe6 ; restore next +// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32 +// CHECK-NEXT: 0xc842 ; ldp x20, x21, [sp, #16] +// CHECK-NEXT: 0x03 ; add sp, #48 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: packed2 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: Yes +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked1 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked2 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked3 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No + + .text + .globl func + .seh_proc func +func: + sub sp, sp, #48 + .seh_stackalloc 48 + // Check that canonical opcode forms (r19r20_x, fplr, fplr_x, save_next, + // set_fp) are treated as a match even if one (in prologue or epilogue) + // was simplified from the more generic opcodes. + stp x20, x21, [sp, #16] + .seh_save_regp x20, 16 + stp x19, x20, [sp, #-32]! + .seh_save_r19r20_x 32 + stp x21, x22, [sp, #16] + .seh_save_regp x21, 16 + stp x29, x30, [sp, #-48]! + .seh_save_regp_x x29, 48 + stp x29, x30, [sp, #16] + .seh_save_regp x29, 16 + add x29, sp, #0 + .seh_add_fp 0 + str d8, [sp, #32] + .seh_save_freg d8, 32 + .seh_endprologue + + nop + + .seh_startepilogue + mov sp, x29 + .seh_set_fp + ldp x29, x30, [sp, #16] + .seh_save_fplr 16 + ldp x29, x30, [sp, #-48]! + .seh_save_fplr_x 48 + ldp x21, x22, [sp, #16] + .seh_save_next + ldp x19, x20, [sp], #32 + .seh_save_regp_x x19, 32 + ldp x20, x21, [sp, #16] + .seh_save_regp x20, 16 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + + // Test a perfectly matching epilog with no offset. + .seh_proc packed2 +packed2: + sub sp, sp, #48 + .seh_stackalloc 48 + stp x29, lr, [sp, #-32]! + .seh_save_fplr_x 32 + .seh_endprologue + nop + .seh_startepilogue + ldp x29, lr, [sp], #32 + .seh_save_fplr_x 32 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + + .seh_proc nonpacked1 +nonpacked1: + sub sp, sp, #48 + .seh_stackalloc 48 + .seh_endprologue + + nop + .seh_startepilogue + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + // This epilogue isn't packed with the prologue, as it doesn't align with + // the end of the function (one extra nop before the ret). + nop + ret + .seh_endproc + + + .seh_proc nonpacked2 +nonpacked2: + sub sp, sp, #48 + .seh_stackalloc 48 + sub sp, sp, #32 + .seh_stackalloc 32 + .seh_endprologue + + nop + .seh_startepilogue + // Not packed; the epilogue mismatches at the second opcode. + add sp, sp, #16 + .seh_stackalloc 16 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + .seh_proc nonpacked3 +nonpacked3: + sub sp, sp, #48 + .seh_stackalloc 48 + sub sp, sp, #32 + .seh_stackalloc 32 + .seh_endprologue + + nop + .seh_startepilogue + // Not packed; the epilogue is longer than the prologue. + mov sp, x29 + .seh_set_fp + add sp, sp, #32 + .seh_stackalloc 32 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc Index: llvm/test/MC/AArch64/seh.s =================================================================== --- llvm/test/MC/AArch64/seh.s +++ llvm/test/MC/AArch64/seh.s @@ -20,7 +20,7 @@ // CHECK-NEXT: } // CHECK: Section { // CHECK: Name: .xdata -// CHECK: RawDataSize: 56 +// CHECK: RawDataSize: 52 // CHECK: RelocationCount: 1 // CHECK: Characteristics [ // CHECK-NEXT: ALIGN_4BYTES @@ -41,7 +41,7 @@ // CHECK-NEXT: Relocations [ // CHECK-NEXT: Section (4) .xdata { -// CHECK-NEXT: 0x2C IMAGE_REL_ARM64_ADDR32NB __C_specific_handler +// CHECK-NEXT: 0x28 IMAGE_REL_ARM64_ADDR32NB __C_specific_handler // CHECK-NEXT: } // CHECK-NEXT: Section (5) .pdata { // CHECK-NEXT: 0x0 IMAGE_REL_ARM64_ADDR32NB func @@ -80,15 +80,9 @@ // CHECK-NEXT: 0x01 ; sub sp, #16 // CHECK-NEXT: 0xe4 ; end // CHECK-NEXT: ] -// CHECK-NEXT: EpilogueScopes [ -// CHECK-NEXT: EpilogueScope { -// CHECK-NEXT: StartOffset: 23 -// CHECK-NEXT: EpilogueStartIndex: 33 -// CHECK-NEXT: Opcodes [ -// CHECK-NEXT: 0x01 ; add sp, #16 -// CHECK-NEXT: 0xe4 ; end -// CHECK-NEXT: ] -// CHECK-NEXT: } +// CHECK-NEXT: Epilogue [ +// CHECK-NEXT: 0x01 ; add sp, #16 +// CHECK-NEXT: 0xe4 ; end // CHECK-NEXT: ] // CHECK-NEXT: ExceptionHandler [ // CHECK-NEXT: Routine: __C_specific_handler (0x0)