Index: llvm/lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -384,6 +384,10 @@ D8, D9, D10, D11, D12, D13, D14, D15)>; +// A variant for treating X18 as callee saved, when interfacing with +// code that needs X18 to be preserved. +def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>; + // Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. // We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, // and not (LR,FP) pairs. Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2389,6 +2389,7 @@ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); AArch64FunctionInfo *AFI = MF.getInfo(); unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; @@ -2437,6 +2438,16 @@ } } + if (MF.getFunction().getCallingConv() == CallingConv::Win64 && + !Subtarget.isTargetWindows()) { + // For Windows calling convention on a non-windows OS, where X18 is treated + // as reserved, back up X18 when entering non-windows code (marked with the + // Windows calling convention) and restore when returning regardless of + // whether the individual function uses it - it might call other functions + // that clobber it. + SavedRegs.set(AArch64::X18); + } + // Calculates the callee saved stack size. unsigned CSStackSize = 0; unsigned SVECSStackSize = 0; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3958,6 +3958,13 @@ CallingConv::ID CallerCC = CallerF.getCallingConv(); bool CCMatch = CallerCC == CalleeCC; + // When using the Windows calling convention on a non-windows OS, we want + // to back up and restore X18 in such functions; we can't do a tail call + // from those functions. + if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() && + CalleeCC != CallingConv::Win64) + return false; + // Byval parameters hand the function a pointer directly into the stack area // we want to reuse during a tail call. Working around this *is* possible (see // X86) but less efficient and uglier in LowerCall. Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -71,6 +71,10 @@ return CSR_AArch64_AAPCS_SwiftError_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_SaveList; + if (MF->getFunction().getCallingConv() == CallingConv::Win64) + // This is for OSes other than Windows; Windows is a separate case further + // above. + return CSR_AArch64_AAPCS_X18_SaveList; return CSR_AArch64_AAPCS_SaveList; } Index: llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +;; Testing that x18 is backed up and restored, and that x29 (if used) still +;; points to the x29,x30 pair on the stack. + +; RUN: llc < %s -mtriple=aarch64-linux-gnu --frame-pointer=non-leaf | FileCheck %s + +declare dso_local void @other() + +define dso_local win64cc void @func() #0 { +; CHECK-LABEL: func: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x18, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: bl other +; CHECK-NEXT: ldr x18, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + tail call void @other() + ret void +} + +attributes #0 = { nounwind }