diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -384,6 +384,10 @@ D8, D9, D10, D11, D12, D13, D14, D15)>; +// A variant for treating X18 as callee saved, when interfacing with +// code that needs X18 to be preserved. +def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>; + // Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. // We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, // and not (LR,FP) pairs. diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2389,6 +2389,7 @@ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); AArch64FunctionInfo *AFI = MF.getInfo(); unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; @@ -2437,6 +2438,16 @@ } } + if (MF.getFunction().getCallingConv() == CallingConv::Win64 && + !Subtarget.isTargetWindows()) { + // For Windows calling convention on a non-windows OS, where X18 is treated + // as reserved, back up X18 when entering non-windows code (marked with the + // Windows calling convention) and restore when returning regardless of + // whether the individual function uses it - it might call other functions + // that clobber it. + SavedRegs.set(AArch64::X18); + } + // Calculates the callee saved stack size. unsigned CSStackSize = 0; unsigned SVECSStackSize = 0; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3958,6 +3958,13 @@ CallingConv::ID CallerCC = CallerF.getCallingConv(); bool CCMatch = CallerCC == CalleeCC; + // When using the Windows calling convention on a non-windows OS, we want + // to back up and restore X18 in such functions; we can't do a tail call + // from those functions. + if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() && + CalleeCC != CallingConv::Win64) + return false; + // Byval parameters hand the function a pointer directly into the stack area // we want to reuse during a tail call. Working around this *is* possible (see // X86) but less efficient and uglier in LowerCall. diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -71,6 +71,10 @@ return CSR_AArch64_AAPCS_SwiftError_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_SaveList; + if (MF->getFunction().getCallingConv() == CallingConv::Win64) + // This is for OSes other than Windows; Windows is a separate case further + // above. + return CSR_AArch64_AAPCS_X18_SaveList; return CSR_AArch64_AAPCS_SaveList; } diff --git a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll --- a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll +++ b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll @@ -2,16 +2,18 @@ define win64cc void @pass_va(i32 %count, ...) nounwind { entry: -; CHECK: str x30, [sp, #-80]! -; CHECK: add x8, sp, #24 -; CHECK: add x0, sp, #24 -; CHECK: stp x1, x2, [sp, #24] -; CHECK: stp x3, x4, [sp, #40] -; CHECK: stp x5, x6, [sp, #56] -; CHECK: str x7, [sp, #72] +; CHECK: sub sp, sp, #96 +; CHECK: add x8, sp, #40 +; CHECK: add x0, sp, #40 +; CHECK: stp x30, x18, [sp, #16] +; CHECK: stp x1, x2, [sp, #40] +; CHECK: stp x3, x4, [sp, #56] +; CHECK: stp x5, x6, [sp, #72] +; CHECK: str x7, [sp, #88] ; CHECK: str x8, [sp, #8] ; CHECK: bl other_func -; CHECK: ldr x30, [sp], #80 +; CHECK: ldp x30, x18, [sp, #16] +; CHECK: add sp, sp, #96 ; CHECK: ret %ap = alloca i8*, align 8 %ap1 = bitcast i8** %ap to i8* @@ -27,11 +29,11 @@ declare void @llvm.va_copy(i8*, i8*) nounwind ; CHECK-LABEL: f9: -; CHECK: sub sp, sp, #16 +; CHECK: str x18, [sp, #-16]! ; CHECK: add x8, sp, #24 ; CHECK: add x0, sp, #24 ; CHECK: str x8, [sp, #8] -; CHECK: add sp, sp, #16 +; CHECK: ldr x18, [sp], #16 ; CHECK: ret define win64cc i8* @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, ...) nounwind { entry: @@ -43,11 +45,11 @@ } ; CHECK-LABEL: f8: -; CHECK: sub sp, sp, #16 +; CHECK: str x18, [sp, #-16]! ; CHECK: add x8, sp, #16 ; CHECK: add x0, sp, #16 ; CHECK: str x8, [sp, #8] -; CHECK: add sp, sp, #16 +; CHECK: ldr x18, [sp], #16 ; CHECK: ret define win64cc i8* @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) nounwind { entry: @@ -59,12 +61,12 @@ } ; CHECK-LABEL: f7: -; CHECK: sub sp, sp, #32 +; CHECK: str x18, [sp, #-32]! ; CHECK: add x8, sp, #24 ; CHECK: str x7, [sp, #24] ; CHECK: add x0, sp, #24 ; CHECK: str x8, [sp, #8] -; CHECK: add sp, sp, #32 +; CHECK: ldr x18, [sp], #32 ; CHECK: ret define win64cc i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, ...) nounwind { entry: diff --git a/llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll b/llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +;; Testing that x18 is backed up and restored, and that x29 (if used) still +;; points to the x29,x30 pair on the stack. + +; RUN: llc < %s -mtriple=aarch64-linux-gnu --frame-pointer=non-leaf | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnu --frame-pointer=non-leaf -mattr=+reserve-x18 | FileCheck %s + +declare dso_local void @other() + +define dso_local win64cc void @func() #0 { +; CHECK-LABEL: func: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x18, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: bl other +; CHECK-NEXT: ldr x18, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + tail call void @other() + ret void +} + +attributes #0 = { nounwind }