Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7510,6 +7510,16 @@ unsigned Align = cast(Op.getOperand(2))->getZExtValue(); EVT VT = Node->getValueType(0); + if (DAG.getMachineFunction().getFunction().hasFnAttribute( + "no-stack-arg-probe")) { + SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); + Chain = SP.getValue(1); + SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); + Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); + SDValue Ops[2] = {SP, Chain}; + return DAG.getMergeValues(Ops, dl); + } + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG); Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -13950,6 +13950,16 @@ SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); + if (DAG.getMachineFunction().getFunction().hasFnAttribute( + "no-stack-arg-probe")) { + SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = SP.getValue(1); + SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size); + Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP); + SDValue Ops[2] = { SP, Chain }; + return DAG.getMergeValues(Ops, DL); + } + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, DAG.getConstant(2, DL, MVT::i32)); Index: test/CodeGen/AArch64/no-stack-arg-probe.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/no-stack-arg-probe.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \ +; RUN: | FileCheck %s + +define void @check_watermark() "no-stack-arg-probe" { +entry: + %buffer = alloca [4096 x i8], align 1 + ret void +} + +; CHECK: check_watermark: +; CHECK: sub sp, sp, #1, lsl #12 +; CHECK-NOT: bl __chkstk Index: test/CodeGen/AArch64/win-alloca-no-stack-probe.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/win-alloca-no-stack-probe.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple aarch64-windows -verify-machineinstrs -filetype asm -o - %s | FileCheck %s + +define void @func(i64 %a) "no-stack-arg-probe" { +entry: + %0 = alloca i8, i64 %a, align 16 + call void @func2(i8* nonnull %0) + ret void +} + +declare void @func2(i8*) + +; The -O0 version here ends up much less elegant, so just check the +; details of the optimized form, but check that -O0 at least emits the +; call to __chkstk. + +; CHECK: add [[REG1:x[0-9]+]], x0, #15 +; CHECK-NOT: bl __chkstk +; CHECK: mov [[REG2:x[0-9]+]], sp +; CHECK: and [[REG1]], [[REG1]], #0xfffffffffffffff0 +; CHECK: sub [[REG3:x[0-9]+]], [[REG2]], [[REG1]] +; CHECK: mov sp, [[REG3]] Index: test/CodeGen/ARM/Windows/alloca-no-stack-arg-probe.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/Windows/alloca-no-stack-arg-probe.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s + +declare arm_aapcs_vfpcc i32 @num_entries() + +define arm_aapcs_vfpcc void @test___builtin_alloca() "no-stack-arg-probe" { +entry: + %array = alloca i8*, align 4 + %call = call arm_aapcs_vfpcc i32 @num_entries() + %mul = mul i32 4, %call + %0 = alloca i8, i32 %mul + store i8* %0, i8** %array, align 4 + ret void +} + +; CHECK: bl num_entries +; CHECK: movs [[R1:r[0-9]+]], #7 +; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2 +; CHECK: bic [[R0]], [[R0]], #7 +; CHECK-NOT: bl __chkstk +; CHECK: sub.w [[R0]], sp, [[R0]] +; CHECK: mov sp, [[R0]] Index: test/CodeGen/ARM/Windows/chkstk-no-stack-arg-probe.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/Windows/chkstk-no-stack-arg-probe.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=thumbv7-windows -verify-machineinstrs %s -o - \ +; RUN: | FileCheck %s + +define arm_aapcs_vfpcc void @check_watermark() "no-stack-arg-probe" { +entry: + %buffer = alloca [4096 x i8], align 1 + ret void +} + +; CHECK: check_watermark: +; CHECK-NOT: bl __chkstk +; CHECK: sub.w sp, sp, #4096