Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1020,8 +1020,8 @@ SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, - SDValue &Size, - SelectionDAG &DAG) const; + SDValue &Size, SelectionDAG &DAG, + MaybeAlign Align) const; SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef LoadOps, EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12216,8 +12216,12 @@ } SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC( - SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const { + SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG, + MaybeAlign Align) const { SDLoc dl(Op); + + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0); @@ -12238,9 +12242,25 @@ // from X15 here doesn't work at -O0, since it thinks that X15 is undefined // here. - Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size, - DAG.getConstant(4, dl, MVT::i64)); - return Chain; + SDValue ArithExtImm = DAG.getTargetConstant( + AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4), dl, MVT::i32); + + SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); + Chain = SP.getValue(1); + SP = SDValue(DAG.getMachineNode(AArch64::SUBXrx64, dl, MVT::i64, SP, Size, + ArithExtImm), + 0); + if (Align) + SP = DAG.getNode(ISD::AND, dl, MVT::i64, SP, + DAG.getConstant(-(uint64_t)Align->value(), dl, MVT::i64)); + Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + SDValue Ops[2] = {SP, Chain}; + + return DAG.getMergeValues(Ops, dl); } SDValue @@ -12270,23 +12290,7 @@ return DAG.getMergeValues(Ops, dl); } - Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); - - Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG); - - SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); - Chain = SP.getValue(1); - SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); - if (Align) - SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); - Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), - DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); - - SDValue Ops[2] = {SP, Chain}; - return DAG.getMergeValues(Ops, dl); + return LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG, Align); } SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, Index: llvm/test/CodeGen/AArch64/win-alloca.ll =================================================================== --- llvm/test/CodeGen/AArch64/win-alloca.ll +++ llvm/test/CodeGen/AArch64/win-alloca.ll @@ -17,7 +17,6 @@ ; CHECK: add [[REG1:x[0-9]+]], x0, #15 ; CHECK-OPT: lsr x15, [[REG1]], #4 ; CHECK: bl __chkstk -; CHECK: mov [[REG2:x[0-9]+]], sp -; CHECK-OPT: sub [[REG3:x[0-9]+]], [[REG2]], x15, lsl #4 +; CHECK-OPT: sub [[REG3:x[0-9]+]], sp, x15, lsl #4 ; CHECK-OPT: mov sp, [[REG3]] ; CHECK: bl func2 Index: llvm/test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- llvm/test/CodeGen/AArch64/win64_vararg.ll +++ llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -197,8 +197,7 @@ ; CHECK-NEXT: stp x5, x6, [x29, #48] ; CHECK-NEXT: str x7, [x29, #64] ; CHECK-NEXT: bl __chkstk -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: sub x20, x8, x15, lsl #4 +; CHECK-NEXT: sub x20, sp, x15, lsl #4 ; CHECK-NEXT: mov sp, x20 ; CHECK-NEXT: ldr x21, [x29, #16] ; CHECK-NEXT: sxtw x22, w0