diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -281,15 +281,10 @@ } /// Replace pseudo store instructions that pass arguments through the stack with -/// real instructions. If insertPushes is true then all instructions are -/// replaced with push instructions, otherwise regular std instructions are -/// inserted. +/// real instructions. static void fixStackStores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const TargetInstrInfo &TII, bool insertPushes) { - const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); - const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - + const TargetInstrInfo &TII, Register FP) { // Iterate through the BB until we hit a call instruction or we reach the end. for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) { MachineBasicBlock::iterator NextMI = std::next(I); @@ -304,29 +299,6 @@ assert(MI.getOperand(0).getReg() == AVR::SP && "Invalid register, should be SP!"); - if (insertPushes) { - // Replace this instruction with a push. - Register SrcReg = MI.getOperand(2).getReg(); - bool SrcIsKill = MI.getOperand(2).isKill(); - - // We can't use PUSHWRr here because when expanded the order of the new - // instructions are reversed from what we need. Perform the expansion now. - if (Opcode == AVR::STDWSPQRr) { - BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr)) - .addReg(TRI.getSubReg(SrcReg, AVR::sub_hi), - getKillRegState(SrcIsKill)); - BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr)) - .addReg(TRI.getSubReg(SrcReg, AVR::sub_lo), - getKillRegState(SrcIsKill)); - } else { - BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr)) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - } - - MI.eraseFromParent(); - I = NextMI; - continue; - } // Replace this instruction with a regular store. Use Y as the base // pointer since it is guaranteed to contain a copy of SP. @@ -334,7 +306,7 @@ (Opcode == AVR::STDWSPQRr) ? AVR::STDWPtrQRr : AVR::STDPtrQRr; MI.setDesc(TII.get(STOpc)); - MI.getOperand(0).setReg(AVR::R29R28); + MI.getOperand(0).setReg(FP); I = NextMI; } @@ -350,7 +322,7 @@ // function entry. Delete the call frame pseudo and replace all pseudo stores // with real store instructions. if (hasReservedCallFrame(MF)) { - fixStackStores(MBB, MI, TII, false); + fixStackStores(MBB, MI, TII, AVR::R29R28); return MBB.erase(MI); } @@ -358,18 +330,37 @@ unsigned int Opcode = MI->getOpcode(); int Amount = TII.getFrameSize(*MI); - // Adjcallstackup does not need to allocate stack space for the call, instead - // we insert push instructions that will allocate the necessary stack. - // For adjcallstackdown we convert it into an 'adiw reg, ' handling - // the read and write of SP in I/O space. + // ADJCALLSTACKUP and ADJCALLSTACKDOWN are converted to adiw/subi + // instructions to read and write the stack pointer in I/O space. if (Amount != 0) { assert(getStackAlign() == Align(1) && "Unsupported stack alignment"); if (Opcode == TII.getCallFrameSetupOpcode()) { - fixStackStores(MBB, MI, TII, true); + // Update the stack pointer. + // In many cases this can be done far more efficiently by pushing the + // relevant values directly to the stack. However, doing that correctly + // (in the right order, possibly skipping some empty space for undef + // values, etc) is tricky and thus left to be optimized in the future. + BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP); + + MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30) + .addReg(AVR::R31R30, RegState::Kill) + .addImm(Amount); + New->getOperand(3).setIsDead(); + + BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP) + .addReg(AVR::R31R30, RegState::Kill); + + // Make sure the remaining stack stores are converted to real store + // instructions. + fixStackStores(MBB, MI, TII, AVR::R31R30); } else { assert(Opcode == TII.getCallFrameDestroyOpcode()); + // Note that small stack changes could be implemented more efficiently + // with a few pop instructions instead of the 8-9 instructions now + // required. + // Select the best opcode to adjust SP based on the offset size. unsigned addOpcode; if (isUInt<6>(Amount)) { diff --git a/llvm/test/CodeGen/AVR/call.ll b/llvm/test/CodeGen/AVR/call.ll --- a/llvm/test/CodeGen/AVR/call.ll +++ b/llvm/test/CodeGen/AVR/call.ll @@ -32,8 +32,8 @@ ; CHECK-LABEL: calli8_stack: ; CHECK: ldi [[REG1:r[0-9]+]], 10 ; CHECK: ldi [[REG2:r[0-9]+]], 11 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: call foo8_3 %result1 = call i8 @foo8_3(i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11) ret i8 %result1 @@ -54,12 +54,12 @@ ; CHECK-LABEL: calli16_stack: ; CHECK: ldi [[REG1:r[0-9]+]], 9 ; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 10 ; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: call foo16_2 %result1 = call i16 @foo16_2(i16 512, i16 513, i16 514, i16 515, i16 516, i16 517, i16 518, i16 519, i16 520, i16 521, i16 522) ret i16 %result1 @@ -84,12 +84,12 @@ ; CHECK-LABEL: calli32_stack: ; CHECK: ldi [[REG1:r[0-9]+]], 64 ; CHECK: ldi [[REG2:r[0-9]+]], 66 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 15 ; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: call foo32_2 %result1 = call i32 @foo32_2(i32 1, i32 2, i32 3, i32 4, i32 34554432) ret i32 %result1 @@ -115,20 +115,20 @@ ; CHECK: ldi [[REG1:r[0-9]+]], 76 ; CHECK: ldi [[REG2:r[0-9]+]], 73 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+5, [[REG1]] +; CHECK: std Z+6, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 31 ; CHECK: ldi [[REG2:r[0-9]+]], 242 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+7, [[REG1]] +; CHECK: std Z+8, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 155 ; CHECK: ldi [[REG2:r[0-9]+]], 88 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 255 ; CHECK: ldi [[REG2:r[0-9]+]], 255 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: call foo64_2 %result1 = call i64 @foo64_2(i64 1, i64 2, i64 17446744073709551615) ret i64 %result1 diff --git a/llvm/test/CodeGen/AVR/dynalloca.ll b/llvm/test/CodeGen/AVR/dynalloca.ll --- a/llvm/test/CodeGen/AVR/dynalloca.ll +++ b/llvm/test/CodeGen/AVR/dynalloca.ll @@ -53,9 +53,27 @@ ; CHECK-LABEL: dynalloca2: ; CHECK: in [[SPCOPY1:r[0-9]+]], 61 ; CHECK: in [[SPCOPY2:r[0-9]+]], 62 -; CHECK: push -; CHECK-NOT: st -; CHECK-NOT: std +; Allocate stack space for call +; CHECK: in {{.*}}, 61 +; CHECK: in {{.*}}, 62 +; CHECK: subi +; CHECK: sbci +; CHECK: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, {{.*}} +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, {{.*}} +; Store values on the stack +; CHECK: ldi r16, 0 +; CHECK: ldi r17, 0 +; CHECK: std Z+5, r16 +; CHECK: std Z+6, r17 +; CHECK: std Z+7, r16 +; CHECK: std Z+8, r17 +; CHECK: std Z+3, r16 +; CHECK: std Z+4, r17 +; CHECK: std Z+1, r16 +; CHECK: std Z+2, r17 ; CHECK: call ; Call frame restore ; CHECK-NEXT: in r30, 61 diff --git a/llvm/test/CodeGen/AVR/varargs.ll b/llvm/test/CodeGen/AVR/varargs.ll --- a/llvm/test/CodeGen/AVR/varargs.ll +++ b/llvm/test/CodeGen/AVR/varargs.ll @@ -42,16 +42,16 @@ ; CHECK-LABEL: varargcall: ; CHECK: ldi [[REG1:r[0-9]+]], 189 ; CHECK: ldi [[REG2:r[0-9]+]], 205 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 191 ; CHECK: ldi [[REG2:r[0-9]+]], 223 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+5, [[REG1]] +; CHECK: std Z+6, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 205 ; CHECK: ldi [[REG2:r[0-9]+]], 171 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: call ; CHECK: adiw r30, 6 tail call void (i16, ...) @var1223(i16 -21555, i16 -12867, i16 -8257)