Skip to content

Commit abac6e9

Browse files
committedJun 9, 2014
ARM: add VLA extension for WoA Itanium ABI
The armv7-windows-itanium environment is nearly identical to the MSVC ABI. It has a few divergences, mostly revolving around the use of the Itanium ABI for C++. VLA support is one of the extensions that are amongst the set of the extensions. This adds support for proper VLA emission for this environment. This is somewhat similar to the handling for __chkstk emission on X86 and the large stack frame emission for ARM. The invocation style for chkstk is still controlled via the -mcmodel flag to clang. Make an explicit note that this is an extension. llvm-svn: 210489
1 parent 44f60d0 commit abac6e9

File tree

5 files changed

+170
-1
lines changed

5 files changed

+170
-1
lines changed
 

‎llvm/docs/Extensions.rst

+14
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,17 @@ range via a slight deviation. It will generate an indirect jump as follows:
195195
blx r12
196196
sub.w sp, sp, r4
197197
198+
Variable Length Arrays
199+
^^^^^^^^^^^^^^^^^^^^^^
200+
201+
The reference implementation (Microsoft Visual Studio 2012) does not permit the
202+
emission of Variable Length Arrays (VLAs).
203+
204+
The Windows ARM Itanium ABI extends the base ABI by adding support for emitting
205+
a dynamic stack allocation. When emitting a variable stack allocation, a call
206+
to ``__chkstk`` is emitted unconditionally to ensure that guard pages are setup
207+
properly. The emission of this stack probe emission is handled similar to the
208+
standard stack probe emission.
209+
210+
The MSVC environment does not emit code for VLAs currently.
211+

‎llvm/lib/Target/ARM/ARMISelLowering.cpp

+106-1
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
710710
setExceptionSelectorRegister(ARM::R1);
711711
}
712712

713-
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
713+
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
714+
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
715+
else
716+
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
717+
714718
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
715719
// the default expansion.
716720
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
@@ -983,6 +987,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
983987

984988
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
985989

990+
case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
991+
986992
case ARMISD::VCEQ: return "ARMISD::VCEQ";
987993
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
988994
case ARMISD::VCGE: return "ARMISD::VCGE";
@@ -6214,6 +6220,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
62146220
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
62156221
case ISD::SDIVREM:
62166222
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
6223+
case ISD::DYNAMIC_STACKALLOC:
6224+
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
6225+
return LowerDYNAMIC_STACKALLOC(Op, DAG);
6226+
llvm_unreachable("Don't know how to custom lower this!");
62176227
}
62186228
}
62196229

@@ -7112,6 +7122,73 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
71127122
return BB;
71137123
}
71147124

7125+
MachineBasicBlock *
7126+
ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
7127+
MachineBasicBlock *MBB) const {
7128+
const TargetMachine &TM = getTargetMachine();
7129+
const TargetInstrInfo &TII = *TM.getInstrInfo();
7130+
DebugLoc DL = MI->getDebugLoc();
7131+
7132+
assert(Subtarget->isTargetWindows() &&
7133+
"__chkstk is only supported on Windows");
7134+
assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
7135+
7136+
// __chkstk takes the number of words to allocate on the stack in R4, and
7137+
// returns the stack adjustment in number of bytes in R4. This will not
7138+
// clober any other registers (other than the obvious lr).
7139+
//
7140+
// Although, technically, IP should be considered a register which may be
7141+
// clobbered, the call itself will not touch it. Windows on ARM is a pure
7142+
// thumb-2 environment, so there is no interworking required. As a result, we
7143+
// do not expect a veneer to be emitted by the linker, clobbering IP.
7144+
//
7145+
// Each module recieves its own copy of __chkstk, so no import thunk is
7146+
// required, again, ensuring that IP is not clobbered.
7147+
//
7148+
// Finally, although some linkers may theoretically provide a trampoline for
7149+
// out of range calls (which is quite common due to a 32M range limitation of
7150+
// branches for Thumb), we can generate the long-call version via
7151+
// -mcmodel=large, alleviating the need for the trampoline which may clobber
7152+
// IP.
7153+
7154+
switch (TM.getCodeModel()) {
7155+
case CodeModel::Small:
7156+
case CodeModel::Medium:
7157+
case CodeModel::Default:
7158+
case CodeModel::Kernel:
7159+
BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
7160+
.addImm((unsigned)ARMCC::AL).addReg(0)
7161+
.addExternalSymbol("__chkstk")
7162+
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
7163+
.addReg(ARM::R4, RegState::Implicit | RegState::Define)
7164+
.addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
7165+
break;
7166+
case CodeModel::Large:
7167+
case CodeModel::JITDefault: {
7168+
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
7169+
unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
7170+
7171+
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
7172+
.addExternalSymbol("__chkstk");
7173+
BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
7174+
.addImm((unsigned)ARMCC::AL).addReg(0)
7175+
.addReg(Reg, RegState::Kill)
7176+
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
7177+
.addReg(ARM::R4, RegState::Implicit | RegState::Define)
7178+
.addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
7179+
break;
7180+
}
7181+
}
7182+
7183+
AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
7184+
ARM::SP)
7185+
.addReg(ARM::SP, RegState::Define)
7186+
.addReg(ARM::R4, RegState::Kill)));
7187+
7188+
MI->eraseFromParent();
7189+
return MBB;
7190+
}
7191+
71157192
MachineBasicBlock *
71167193
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
71177194
MachineBasicBlock *BB) const {
@@ -7361,6 +7438,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
73617438
case ARM::COPY_STRUCT_BYVAL_I32:
73627439
++NumLoopByVals;
73637440
return EmitStructByval(MI, BB);
7441+
case ARM::WIN__CHKSTK:
7442+
return EmitLowered__chkstk(MI, BB);
73647443
}
73657444
}
73667445

@@ -10481,6 +10560,32 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
1048110560
return CallInfo.first;
1048210561
}
1048310562

10563+
SDValue
10564+
ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
10565+
assert(Subtarget->isTargetWindows() && "unsupported target platform");
10566+
SDLoc DL(Op);
10567+
10568+
// Get the inputs.
10569+
SDValue Chain = Op.getOperand(0);
10570+
SDValue Size = Op.getOperand(1);
10571+
10572+
SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
10573+
DAG.getConstant(2, MVT::i32));
10574+
10575+
SDValue Flag;
10576+
Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
10577+
Flag = Chain.getValue(1);
10578+
10579+
SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
10580+
Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
10581+
10582+
SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
10583+
Chain = NewSP.getValue(1);
10584+
10585+
SDValue Ops[2] = { NewSP, Chain };
10586+
return DAG.getMergeValues(Ops, DL);
10587+
}
10588+
1048410589
bool
1048510590
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
1048610591
// The ARM target isn't yet aware of offsets.

‎llvm/lib/Target/ARM/ARMISelLowering.h

+6
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ namespace llvm {
9595

9696
PRELOAD, // Preload
9797

98+
WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
99+
98100
VCEQ, // Vector compare equal.
99101
VCEQZ, // Vector compare equal to zero.
100102
VCGE, // Vector compare greater than or equal.
@@ -470,6 +472,7 @@ namespace llvm {
470472
const ARMSubtarget *ST) const;
471473
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
472474
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
475+
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
473476

474477
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
475478

@@ -578,6 +581,9 @@ namespace llvm {
578581

579582
MachineBasicBlock *EmitStructByval(MachineInstr *MI,
580583
MachineBasicBlock *MBB) const;
584+
585+
MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
586+
MachineBasicBlock *MBB) const;
581587
};
582588

583589
enum NEONModImmType {

‎llvm/lib/Target/ARM/ARMInstrInfo.td

+13
Original file line numberDiff line numberDiff line change
@@ -5093,6 +5093,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
50935093
let Inst{11-0} = a;
50945094
}
50955095

5096+
// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
5097+
// are needed to probe the stack when allocating more than
5098+
// 4k bytes in one go. Touching the stack at 4K increments is necessary to
5099+
// ensure that the guard pages used by the OS virtual memory manager are
5100+
// allocated in correct sequence.
5101+
// The main point of having separate instruction are extra unmodelled effects
5102+
// (compared to ordinary calls) like stack pointer change.
5103+
5104+
def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
5105+
[SDNPHasChain, SDNPSideEffect]>;
5106+
let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
5107+
def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
5108+
50965109
//===----------------------------------------------------------------------===//
50975110
// TLS Instructions
50985111
//

‎llvm/test/CodeGen/ARM/Windows/vla.ll

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s \
2+
; RUN: | FileCheck %s -check-prefix CHECK-SMALL-CODE
3+
; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -code-model=large -o - %s \
4+
; RUN: | FileCheck %s -check-prefix CHECK-LARGE-CODE
5+
; RUN: llc -mtriple=thumbv7-windows-msvc -mcpu=cortex-a9 -o - %s \
6+
; RUN: | FileCheck %s -check-prefix CHECK-MSVC
7+
8+
define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) {
9+
entry:
10+
%vla = alloca i8, i32 %sz, align 1
11+
%arrayidx = getelementptr inbounds i8* %vla, i32 %idx
12+
%0 = load volatile i8* %arrayidx, align 1
13+
ret i8 %0
14+
}
15+
16+
; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7
17+
; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7
18+
; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2
19+
; CHECK-SMALL-CODE: bl __chkstk
20+
; CHECK-SMALL-CODE: sub.w sp, sp, r4
21+
22+
; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7
23+
; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7
24+
; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2
25+
; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk
26+
; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk
27+
; CHECK-LARGE-CODE: blx [[IP]]
28+
; CHECK-LARGE-CODE: sub.w sp, sp, r4
29+
30+
; CHECK-MSVC-NOT: __chkstk
31+

0 commit comments

Comments
 (0)
Please sign in to comment.