diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -303,50 +303,41 @@ continue; } + assert((DestReg != ARM::SP ||BaseReg == ARM::SP) && + "Writing to SP, from other register."); + + // Try to use T1, as it smaller + if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) { + assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg) + .addImm(ThisVal / 4) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); + break; + } bool HasCCOut = true; - if (BaseReg == ARM::SP) { - // sub sp, sp, #imm7 - if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { - assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg) - .addImm(ThisVal / 4) - .setMIFlags(MIFlags) - .add(predOps(ARMCC::AL)); - NumBytes = 0; - continue; - } - - // sub rd, sp, so_imm - Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; - if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { - NumBytes = 0; - } else { - // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = countLeadingZeros(ThisVal); - ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); - NumBytes &= ~ThisVal; - assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && - "Bit extraction didn't work?"); - } + int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal); + + Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm + if (ImmIsT2SO != -1) { + NumBytes = 0; + } else if (ThisVal < 4096) { + // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp, + // sp, imm12 + Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + HasCCOut = false; + NumBytes = 0; } else { - assert(DestReg != ARM::SP && BaseReg != ARM::SP); - Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; - if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { - NumBytes = 0; - } else if (ThisVal < 4096) { - Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; - HasCCOut = false; - NumBytes = 0; - } else { - // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = countLeadingZeros(ThisVal); - ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); - NumBytes &= ~ThisVal; - assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && - "Bit extraction didn't work?"); - } + // Use one T2 instruction to reduce NumBytes + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = countLeadingZeros(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); } // Build the new ADD / SUB. diff --git a/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir @@ -0,0 +1,88 @@ +--- | + ; RUN: llc --run-pass=prologepilog -o - %s | FileCheck %s + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 + ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 4008, 14, $noreg + + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7-none-none-eabi" + define void @foo() #0 { + entry: + %v = alloca [4000 x i8], align 1 + %s = alloca i8*, align 4 + %0 = bitcast [4000 x i8]* %v to i8* + store i8* %0, i8** %s, align 4 + %1 = load i8*, i8** %s, align 4 + call void @bar(i8* %1) + ret void + } + declare void @bar(i8*) #1 + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #2 = { nounwind } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !2 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project.git ee219345881bdf2c144d40731f055e7b36bc8bce)"} + +... +--- +name: foo +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 4004 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: v, type: default, offset: 0, size: 4000, alignment: 1, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4000, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: s, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4004, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + renamable $r0 = t2ADDri %stack.0.v, 0, 14, $noreg, $noreg + t2STRi12 killed renamable $r0, %stack.1.s, 0, 14, $noreg :: (store 4 into %ir.s) + renamable $r0 = t2LDRi12 %stack.1.s, 0, 14, $noreg :: (dereferenceable load 4 from %ir.s) + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBX_RET 14, $noreg + +... diff --git a/llvm/test/CodeGen/Thumb2/large-call.ll b/llvm/test/CodeGen/Thumb2/large-call.ll --- a/llvm/test/CodeGen/Thumb2/large-call.ll +++ b/llvm/test/CodeGen/Thumb2/large-call.ll @@ -9,7 +9,7 @@ ; CHECK: main ; CHECK: vmov.f64 ; Adjust SP for the large call -; CHECK: sub sp, +; CHECK: subw sp, sp, #3720 ; Store to call frame + #8 ; CHECK: vstr{{.*\[}}sp, #8] ; Don't clobber that store until the call. diff --git a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir --- a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir +++ b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir @@ -118,8 +118,7 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -28 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -32 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -36 - ; CHECK-NEXT: $sp = frame-setup t2SUBri killed $sp, 1216, 14, $noreg, $noreg - ; CHECK-NEXT: $sp = frame-setup tSUBspi $sp, 1, 14, $noreg + ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 1220, 14, $noreg ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1256 ; CHECK-NEXT: $r0 = IMPLICIT_DEF ; CHECK-NEXT: $r1 = IMPLICIT_DEF