Index: llvm/lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -468,7 +468,7 @@ // CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS. def CSR_Darwin_AArch64_CXX_TLS : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, - (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19), (sequence "D%u", 0, 31))>; // CSRs that are handled by prologue, epilogue. Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2292,7 +2292,7 @@ // MachO's compact unwind format relies on all registers being stored in // pairs. assert((!produceCompactUnwindFrame(MF) || - CC == CallingConv::PreserveMost || + CC == CallingConv::PreserveMost || CC == CallingConv::CXX_FAST_TLS || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); int ByteOffset = AFI->getCalleeSavedStackSize(); @@ -2387,7 +2387,7 @@ // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!produceCompactUnwindFrame(MF) || - CC == CallingConv::PreserveMost || + CC == CallingConv::PreserveMost || CC == CallingConv::CXX_FAST_TLS || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && Index: llvm/test/CodeGen/AArch64/cxx-tlscc.ll =================================================================== --- llvm/test/CodeGen/AArch64/cxx-tlscc.ll +++ llvm/test/CodeGen/AArch64/cxx-tlscc.ll @@ -93,9 +93,9 @@ ; CHECK-O0: stp d5, d4 ; CHECK-O0: stp d3, d2 ; CHECK-O0: stp d1, d0 -; CHECK-O0: stp x14, x13 -; CHECK-O0: stp x12, x11 -; CHECK-O0: stp x10, x9 +; CHECK-O0: str x14 +; CHECK-O0: stp x13, x12 +; CHECK-O0: stp x11, x10 ; CHECK-O0: stp x8, x7 ; CHECK-O0: stp x6, x5 ; CHECK-O0: stp x4, x3 @@ -110,9 +110,9 @@ ; CHECK-O0: ldp x4, x3 ; CHECK-O0: ldp x6, x5 ; CHECK-O0: ldp x8, x7 -; CHECK-O0: ldp x10, x9 -; CHECK-O0: ldp x12, x11 -; CHECK-O0: ldp x14, x13 +; CHECK-O0: ldp x11, x10 +; CHECK-O0: ldp x13, x12 +; CHECK-O0: ldr x14 ; CHECK-O0: ldp d1, d0 ; CHECK-O0: ldp d3, d2 ; CHECK-O0: ldp d5, d4 @@ -220,5 +220,31 @@ ret void } +define cxx_fast_tlscc void @weird_prologue_regs(i32 %n) #1 { +; CHECK-LABEL: weird_prologue_regs: +; CHECK-NOT: str x9 +; CHECK-NOT: stp{{.*}}x9{{.*}}[ +; CHECK-NOT: str x19 +; CHECK-NOT: stp{{.*}}x19{{.*}}[ + +; CHECK: sub x9, sp, # +; CHECK: and sp, x9, #0x +; CHECK: mov x19, sp + +; CHECK-NOT: str x9 +; CHECK-NOT: stp{{.*}}x9{{.*}}[ +; CHECK-NOT: str x19 +; CHECK-NOT: stp{{.*}}x19{{.*}}[ + + %p0 = alloca i32, i32 200 + %p1 = alloca i32, align 32 + %p2 = alloca i32, i32 %n + call void @callee(i32* %p0) + call void @callee(i32* %p1) + call void @callee(i32* %p2) + ret void +} +declare void @callee(i32*) + attributes #0 = { nounwind "frame-pointer"="all" } attributes #1 = { nounwind }