diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -69,6 +69,7 @@ bool tryMLAV64LaneV128(SDNode *N); bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); + bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { @@ -893,6 +894,30 @@ return isWorthFolding(N); } +/// SelectArithUXTXRegister - Select a "UXTX register" operand. This +/// operand is refered by the instructions have SP operand +bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, + SDValue &Shift) { + unsigned ShiftVal = 0; + AArch64_AM::ShiftExtendType Ext; + + if (N.getOpcode() != ISD::SHL) + return false; + + ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); + if (!CSD) + return false; + ShiftVal = CSD->getZExtValue(); + if (ShiftVal > 4) + return false; + + Ext = AArch64_AM::UXTX; + Reg = N.getOperand(0); + Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), + MVT::i32); + return isWorthFolding(N); +} + /// If there's a use of this ADDlow that's not itself a load/store then we'll /// need to create a real ADD instruction from it anyway and there's no point in /// folding it into the mem op. Theoretically, it shouldn't matter, but there's diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1168,6 +1168,8 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def arith_uxtx : ComplexPattern; + // Floating-point immediate. def fpimm16XForm : SDNodeXForm; defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; +def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ + return N->getOpcode() == ISD::CopyFromReg && + cast(N->getOperand(1))->getReg() == AArch64::SP; +}]>; + // Use SUBS instead of SUB to enable CSE between SUBS and SUB. def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; @@ -1709,6 +1714,8 @@ (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; +def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), + (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; } // Because of the immediate format for add/sub-imm instructions, the diff --git a/llvm/test/CodeGen/AArch64/win-alloca.ll b/llvm/test/CodeGen/AArch64/win-alloca.ll --- a/llvm/test/CodeGen/AArch64/win-alloca.ll +++ b/llvm/test/CodeGen/AArch64/win-alloca.ll @@ -17,7 +17,6 @@ ; CHECK: add [[REG1:x[0-9]+]], x0, #15 ; CHECK-OPT: lsr x15, [[REG1]], #4 ; CHECK: bl __chkstk -; CHECK: mov [[REG2:x[0-9]+]], sp -; CHECK-OPT: sub [[REG3:x[0-9]+]], [[REG2]], x15, lsl #4 +; CHECK-OPT: sub [[REG3:x[0-9]+]], sp, x15, lsl #4 ; CHECK-OPT: mov sp, [[REG3]] ; CHECK: bl func2 diff --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll --- a/llvm/test/CodeGen/AArch64/win64_vararg.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -197,8 +197,7 @@ ; CHECK-NEXT: stp x5, x6, [x29, #48] ; CHECK-NEXT: str x7, [x29, #64] ; CHECK-NEXT: bl __chkstk -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: sub x20, x8, x15, lsl #4 +; CHECK-NEXT: sub x20, sp, x15, lsl #4 ; CHECK-NEXT: mov sp, x20 ; CHECK-NEXT: ldr x21, [x29, #16] ; CHECK-NEXT: sxtw x22, w0