diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -449,6 +449,13 @@ return nullptr; } + /// Return a register mask for the registers preserved by the unwinder, + /// or nullptr if no custom mask is needed. + virtual const uint32_t * + getCustomEHPadPreservedMask(const MachineFunction &MF) const { + return nullptr; + } + /// Return a register mask that clobbers everything. virtual const uint32_t *getNoPreservedMask() const { llvm_unreachable("target does not provide no preserved mask"); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2108,6 +2108,12 @@ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL) .addSym(MF->addLandingPad(&MBB)); + // If the unwinder does not preserve all registers, ensure that the + // function marks the clobbered registers as used. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) + MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); + LLT Ty = getLLTForType(*LP.getType(), *DL); Register Undef = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildUndef(Undef); diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -225,6 +225,15 @@ RegMaskBits.push_back(Mask); } + // Unwinders may clobber additional registers. + // FIXME: This functionality can possibly be merged into + // MachineBasicBlock::getBeginClobberMask(). + if (MBB.isEHPad()) + if (auto *Mask = TRI->getCustomEHPadPreservedMask(*MBB.getParent())) { + RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + for (const MachineInstr &MI : MBB) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -639,6 +639,12 @@ // Compute MachineRegisterInfo::UsedPhysRegMask for (const MachineBasicBlock &MBB : MF) { + // Make sure MRI knows about registers clobbered by unwinder. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (MBB.isEHPad()) + if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF)) + MRI.addPhysRegsUsedFromRegMask(RegMask); + for (const MachineInstr &MI : MBB) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1251,6 +1251,12 @@ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); + // If the unwinder does not preserve all registers, ensure that the + // function marks the clobbered registers as used. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) + MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); + if (Pers == EHPersonality::Wasm_CXX) { if (const auto *CPI = dyn_cast(LLVMBB->getFirstNonPHI())) mapWasmLandingPadIndex(MBB, CPI); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -72,6 +72,10 @@ // Funclets on ARM64 Windows don't preserve any registers. const uint32_t *getNoPreservedMask() const override; + // Unwinders may not preserve all Neon and SVE registers. + const uint32_t * + getCustomEHPadPreservedMask(const MachineFunction &MF) const override; + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the /// case that 'returned' is on an i64 first argument if the calling convention /// is one that can (partially) model this attribute with a preserved mask diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -240,6 +240,14 @@ return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask; } +const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask( + const MachineFunction &MF) const { + if (MF.getSubtarget().isTargetLinux()) + return CSR_AArch64_AAPCS_RegMask; + + return nullptr; +} + const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { if (TT.isOSDarwin()) return CSR_Darwin_AArch64_TLS_RegMask; diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir @@ -0,0 +1,143 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=regallocfast,prologepilog -simplify-mir -o - %s | FileCheck %s + +# This test checks that the MIRParser correctly clobbers the registers +# that are not preserved by the unwinder (when the MIR contains an EH pad). +--- | + ; ModuleID = '' + source_filename = "" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-linux-gnu" + + define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) personality i8 0 { + %result = invoke aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v) + to label %.Lcontinue unwind label %.Lunwind + + .Lcontinue: ; preds = %0 + ret <4 x i32> %result + + .Lunwind: ; preds = %0 + %lp = landingpad { i8*, i32 } + cleanup + ret <4 x i32> %v + } + + declare aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32>) + +... +--- +name: invoke_callee_may_throw_neon +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +liveins: + - { reg: '$q0' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: invoke_callee_may_throw_neon + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: successors: %bb.1, %bb.2 + ; CHECK: liveins: $q0, $q22, $q23, $q20, $q21, $q18, $q19, $q16, $q17, $q14, $q15, $q12, $q13, $q10, $q11, $q8, $q9, $lr, $fp + ; CHECK: $sp = frame-setup SUBXri $sp, 304, 0 + ; CHECK: frame-setup STPQi killed $q23, killed $q22, $sp, 2 :: (store 16 into %stack.19), (store 16 into %stack.18) + ; CHECK: frame-setup STPQi killed $q21, killed $q20, $sp, 4 :: (store 16 into %stack.17), (store 16 into %stack.16) + ; CHECK: frame-setup STPQi killed $q19, killed $q18, $sp, 6 :: (store 16 into %stack.15), (store 16 into %stack.14) + ; CHECK: frame-setup STPQi killed $q17, killed $q16, $sp, 8 :: (store 16 into %stack.13), (store 16 into %stack.12) + ; CHECK: frame-setup STPQi killed $q15, killed $q14, $sp, 10 :: (store 16 into %stack.11), (store 16 into %stack.10) + ; CHECK: frame-setup STPQi killed $q13, killed $q12, $sp, 12 :: (store 16 into %stack.9), (store 16 into %stack.8) + ; CHECK: frame-setup STPQi killed $q11, killed $q10, $sp, 14 :: (store 16 into %stack.7), (store 16 into %stack.6) + ; CHECK: frame-setup STPQi killed $q9, killed $q8, $sp, 16 :: (store 16 into %stack.5), (store 16 into %stack.4) + ; CHECK: frame-setup STPXi killed $fp, killed $lr, $sp, 36 :: (store 8 into %stack.3), (store 8 into %stack.2) + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 304 + ; CHECK: frame-setup CFI_INSTRUCTION offset $w30, -8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b8, -32 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b9, -48 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b10, -64 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b11, -80 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b12, -96 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b13, -112 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b14, -128 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b15, -144 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b16, -160 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b17, -176 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b18, -192 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b19, -208 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b20, -224 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b21, -240 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b22, -256 + ; CHECK: frame-setup CFI_INSTRUCTION offset $b23, -272 + ; CHECK: EH_LABEL + ; CHECK: STRQui $q0, $sp, 1 :: (store 16 into %stack.0) + ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit killed $q0, implicit-def $q0 + ; CHECK: EH_LABEL + ; CHECK: STRQui killed $q0, $sp, 0 :: (store 16 into %stack.1) + ; CHECK: B %bb.1 + ; CHECK: bb.1..Lcontinue: + ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1) + ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) + ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) + ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) + ; CHECK: $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load 16 from %stack.9), (load 16 from %stack.8) + ; CHECK: $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load 16 from %stack.11), (load 16 from %stack.10) + ; CHECK: $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load 16 from %stack.13), (load 16 from %stack.12) + ; CHECK: $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load 16 from %stack.15), (load 16 from %stack.14) + ; CHECK: $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load 16 from %stack.17), (load 16 from %stack.16) + ; CHECK: $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load 16 from %stack.19), (load 16 from %stack.18) + ; CHECK: $sp = frame-destroy ADDXri $sp, 304, 0 + ; CHECK: RET_ReallyLR implicit killed $q0 + ; CHECK: bb.2..Lunwind (landing-pad): + ; CHECK: liveins: $x0, $x1 + ; CHECK: EH_LABEL + ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0) + ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) + ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) + ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) + ; CHECK: $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load 16 from %stack.9), (load 16 from %stack.8) + ; CHECK: $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load 16 from %stack.11), (load 16 from %stack.10) + ; CHECK: $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load 16 from %stack.13), (load 16 from %stack.12) + ; CHECK: $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load 16 from %stack.15), (load 16 from %stack.14) + ; CHECK: $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load 16 from %stack.17), (load 16 from %stack.16) + ; CHECK: $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load 16 from %stack.19), (load 16 from %stack.18) + ; CHECK: $sp = frame-destroy ADDXri $sp, 304, 0 + ; CHECK: RET_ReallyLR implicit killed $q0 + bb.0 (%ir-block.0): + successors: %bb.1, %bb.2 + liveins: $q0 + + %0:fpr128 = COPY $q0 + EH_LABEL + ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + $q0 = COPY %0 + BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit $q0, implicit-def $q0 + %1:fpr128 = COPY $q0 + ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + EH_LABEL + B %bb.1 + + bb.1..Lcontinue: + $q0 = COPY %1 + RET_ReallyLR implicit $q0 + + bb.2..Lunwind (landing-pad): + liveins: $x0, $x1 + + EH_LABEL + $q0 = COPY %0 + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -0,0 +1,215 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -global-isel=0 -global-isel-abort=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -global-isel=1 -global-isel-abort=0 < %s | FileCheck %s + +; Test that z0 is saved/restored, as the unwinder may only retain the low 64bits (d0). +define @invoke_callee_may_throw_sve( %v) personality i8 0 { +; CHECK-LABEL: invoke_callee_may_throw_sve: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-18 +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: bl may_throw_sve +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: // %.Lcontinue +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #18 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %.Lunwind +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #18 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + %result = invoke @may_throw_sve( %v) to label %.Lcontinue unwind label %.Lunwind +.Lcontinue: + ret %result +.Lunwind: + %lp = landingpad { i8*, i32 } cleanup + ret %v; +} + +declare @may_throw_sve( %v); + + +; Test that q0 is saved/restored, as the unwinder may only retain the low 64bits (d0). +define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) personality i8 0 { +; CHECK-LABEL: invoke_callee_may_throw_neon: +; CHECK: .Lfunc_begin1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: sub sp, sp, #304 // =304 +; CHECK-NEXT: stp q23, q22, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: stp q21, q20, [sp, #64] // 32-byte Folded Spill +; CHECK-NEXT: stp q19, q18, [sp, #96] // 32-byte Folded Spill +; CHECK-NEXT: stp q17, q16, [sp, #128] // 32-byte Folded Spill +; CHECK-NEXT: stp q15, q14, [sp, #160] // 32-byte Folded Spill +; CHECK-NEXT: stp q13, q12, [sp, #192] // 32-byte Folded Spill +; CHECK-NEXT: stp q11, q10, [sp, #224] // 32-byte Folded Spill +; CHECK-NEXT: stp q9, q8, [sp, #256] // 32-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 304 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset b8, -32 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -64 +; CHECK-NEXT: .cfi_offset b11, -80 +; CHECK-NEXT: .cfi_offset b12, -96 +; CHECK-NEXT: .cfi_offset b13, -112 +; CHECK-NEXT: .cfi_offset b14, -128 +; CHECK-NEXT: .cfi_offset b15, -144 +; CHECK-NEXT: .cfi_offset b16, -160 +; CHECK-NEXT: .cfi_offset b17, -176 +; CHECK-NEXT: .cfi_offset b18, -192 +; CHECK-NEXT: .cfi_offset b19, -208 +; CHECK-NEXT: .cfi_offset b20, -224 +; CHECK-NEXT: .cfi_offset b21, -240 +; CHECK-NEXT: .cfi_offset b22, -256 +; CHECK-NEXT: .cfi_offset b23, -272 +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl may_throw_neon +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: .LBB1_1: // %.Lcontinue +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; CHECK-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; CHECK-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; CHECK-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; CHECK-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; CHECK-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; CHECK-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; CHECK-NEXT: add sp, sp, #304 // =304 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %.Lunwind +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; CHECK-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; CHECK-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; CHECK-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; CHECK-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; CHECK-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; CHECK-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; CHECK-NEXT: add sp, sp, #304 // =304 +; CHECK-NEXT: ret + %result = invoke aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v) to label %.Lcontinue unwind label %.Lunwind +.Lcontinue: + ret <4 x i32> %result +.Lunwind: + %lp = landingpad { i8*, i32 } cleanup + ret <4 x i32> %v; +} + +declare aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v);