Index: docs/Extensions.rst =================================================================== --- docs/Extensions.rst +++ docs/Extensions.rst @@ -195,3 +195,17 @@ blx r12 sub.w sp, sp, r4 +Variable Length Arrays +^^^^^^^^^^^^^^^^^^^^^^ + +The reference implementation (Microsoft Visual Studio 2012) does not permit the +emission of Variable Length Arrays (VLAs). + +The Windows ARM Itanium ABI extends the base ABI by adding support for emitting +a dynamic stack allocation. When emitting a variable stack allocation, a call +to ``__chkstk`` is emitted unconditionally to ensure that guard pages are setup +properly. The emission of this stack probe emission is handled similar to the +standard stack probe emission. + +The MSVC environment does not emit code for VLAs currently. + Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -95,6 +95,8 @@ PRELOAD, // Preload + WIN__CHKSTK, // Windows' __chkstk call to do stack probing. + VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. @@ -470,6 +472,7 @@ const ARMSubtarget *ST) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT) const override; @@ -578,6 +581,9 @@ MachineBasicBlock *EmitStructByval(MachineInstr *MI, MachineBasicBlock *MBB) const; + + MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const; }; enum NEONModImmType { Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -710,7 +710,11 @@ setExceptionSelectorRegister(ARM::R1); } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { @@ -983,6 +987,8 @@ case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -6214,6 +6220,10 @@ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + return LowerDYNAMIC_STACKALLOC(Op, DAG); + llvm_unreachable("Don't know how to custom lower this!"); } } @@ -7113,6 +7123,59 @@ } MachineBasicBlock * +ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + assert(Subtarget->isTargetWindows() && + "__chkstk is only supported on Windows"); + assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + + // __chkstk takes the number of words to allocate on the stack in R4, and + // returns the stack adjustment in number of bytes in R4. This will not + // clober any other registers (other than the obvious lr). + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + + BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) + .addExternalSymbol("__chkstk"); + BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::R4, RegState::Kill))); + + MI->eraseFromParent(); + return MBB; +} + +MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -7361,6 +7424,8 @@ case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); + case ARM::WIN__CHKSTK: + return EmitLowered__chkstk(MI, BB); } } @@ -10481,6 +10546,32 @@ return CallInfo.first; } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "unsupported target platform"); + SDLoc DL(Op); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, + DAG.getConstant(2, MVT::i32)); + + SDValue Flag; + Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue); + Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); + + SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = NewSP.getValue(1); + + SDValue Ops[2] = { NewSP, Chain }; + return DAG.getMergeValues(Ops, DL); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -5093,6 +5093,19 @@ let Inst{11-0} = a; } +// Dynamic stack allocation yields a _chkstk for Windows targets. These calls +// are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; +let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in + def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; + //===----------------------------------------------------------------------===// // TLS Instructions // Index: test/CodeGen/ARM/Windows/vla.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/Windows/vla.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s \ +; RUN: | FileCheck %s -check-prefix CHECK-SMALL-CODE +; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -code-model=large -o - %s \ +; RUN: | FileCheck %s -check-prefix CHECK-LARGE-CODE +; RUN: llc -mtriple=thumbv7-windows-msvc -mcpu=cortex-a9 -o - %s \ +; RUN: | FileCheck %s -check-prefix CHECK-MSVC + +define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) { +entry: + %vla = alloca i8, i32 %sz, align 1 + %arrayidx = getelementptr inbounds i8* %vla, i32 %idx + %0 = load volatile i8* %arrayidx, align 1 + ret i8 %0 +} + +; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7 +; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7 +; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2 +; CHECK-SMALL-CODE: bl __chkstk +; CHECK-SMALL-CODE: sub.w sp, sp, r4 + +; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7 +; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7 +; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2 +; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk +; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk +; CHECK-LARGE-CODE: blx [[IP]] +; CHECK-LARGE-CODE: sub.w sp, sp, r4 + +; CHECK-MSVC-NOT: __chkstk +