Index: llvm/trunk/docs/Extensions.rst =================================================================== --- llvm/trunk/docs/Extensions.rst +++ llvm/trunk/docs/Extensions.rst @@ -288,3 +288,31 @@ The MSVC environment does not emit code for VLAs currently. +Windows on ARM64 +---------------- + +Stack Probe Emission +^^^^^^^^^^^^^^^^^^^^ + +The reference implementation (Microsoft Visual Studio 2017) emits stack probes +in the following fashion: + +.. code-block:: gas + + mov x15, #constant + bl __chkstk + sub sp, sp, x15, lsl #4 + +However, this has the limitation of 256 MiB (±128MiB). In order to accommodate +larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB +(±4GiB) range via a slight deviation. It will generate an indirect jump as +follows: + +.. code-block:: gas + + mov x15, #constant + adrp x16, __chkstk + add x16, x16, :lo12:__chkstk + blr x16 + sub sp, sp, x15, lsl #4 + Index: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -97,6 +97,7 @@ #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -335,6 +336,22 @@ return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; } +static bool windowsRequiresStackProbe(MachineFunction &MF, + unsigned StackSizeInBytes) { + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + if (!Subtarget.isTargetWindows()) + return false; + const Function &F = MF.getFunction(); + // TODO: When implementing stack protectors, take that into account + // for the probe threshold. + unsigned StackProbeSize = 4096; + if (F.hasFnAttribute("stack-probe-size")) + F.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackSizeInBytes >= StackProbeSize; +} + bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, unsigned StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo(); @@ -347,7 +364,7 @@ // 512 is the maximum immediate for stp/ldp that will be used for // callee-save save/restores - if (StackBumpBytes >= 512) + if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) return false; if (MFI.hasVarSizedObjects()) @@ -478,7 +495,7 @@ return; int NumBytes = (int)MFI.getStackSize(); - if (!AFI->hasStackFrame()) { + if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. @@ -550,6 +567,44 @@ MachineInstr::FrameSetup); } + if (windowsRequiresStackProbe(MF, NumBytes)) { + uint32_t NumWords = NumBytes >> 4; + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup); + + switch (MF.getTarget().getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Kernel: + BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) + .addExternalSymbol("__chkstk") + .addReg(AArch64::X15, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + case CodeModel::Large: + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) + .addReg(AArch64::X16, RegState::Define) + .addExternalSymbol("__chkstk") + .addExternalSymbol("__chkstk") + .setMIFlags(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR)) + .addReg(AArch64::X16, RegState::Kill) + .addReg(AArch64::X15, RegState::Implicit | RegState::Define) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) + .addReg(AArch64::SP, RegState::Kill) + .addReg(AArch64::X15, RegState::Kill) + .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) + .setMIFlags(MachineInstr::FrameSetup); + NumBytes = 0; + } + // Allocate space for the rest of the frame. if (NumBytes) { const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); @@ -1164,18 +1219,32 @@ unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; + MachineFrameInfo &MFI = MF.getFrameInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + + unsigned BasePointerReg = RegInfo->hasBasePointer(MF) + ? RegInfo->getBaseRegister() + : (unsigned)AArch64::NoRegister; + + unsigned SpillEstimate = SavedRegs.count(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + unsigned PairedReg = CSRegs[i ^ 1]; + if (Reg == BasePointerReg) + SpillEstimate++; + if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) + SpillEstimate++; + } + SpillEstimate += 2; // Conservatively include FP+LR in the estimate + unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate; + // The frame record needs to be created by saving the appropriate registers - if (hasFP(MF)) { + if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) { SavedRegs.set(AArch64::FP); SavedRegs.set(AArch64::LR); } - unsigned BasePointerReg = AArch64::NoRegister; - if (RegInfo->hasBasePointer(MF)) - BasePointerReg = RegInfo->getBaseRegister(); - unsigned ExtraCSSpill = 0; - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { const unsigned Reg = CSRegs[i]; @@ -1217,7 +1286,6 @@ // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. - MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); Index: llvm/trunk/test/CodeGen/AArch64/chkstk.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/chkstk.ll +++ llvm/trunk/test/CodeGen/AArch64/chkstk.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \ +; RUN: | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s + +; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs -code-model=large %s -o - \ +; RUN: | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s + +define void @check_watermark() { +entry: + %buffer = alloca [4096 x i8], align 1 + ret void +} + +; CHECK-DEFAULT-CODE-MODEL: check_watermark: +; CHECK-DEFAULT-CODE-MODEL-DAG: stp x29, x30, [sp +; CHECK-DEFAULT-CODE-MODEL-DAG: orr x15, xzr, #0x100 +; CHECK-DEFAULT-CODE-MODEL: bl __chkstk +; CHECK-DEFAULT-CODE-MODEL: sub sp, sp, x15, lsl #4 + +; CHECK-LARGE-CODE-MODEL: check_watermark: +; CHECK-LARGE-CODE-MODEL-DAG: stp x29, x30, [sp +; CHECK-LARGE-CODE-MODEL-DAG: orr x15, xzr, #0x100 +; CHECK-LARGE-CODE-MODEL-DAG: adrp x16, __chkstk +; CHECK-LARGE-CODE-MODEL-DAG: add x16, x16, __chkstk +; CHECK-LARGE-CODE-MODEL: blr x16 +; CHECK-LARGE-CODE-MODEL: sub sp, sp, x15, lsl #4