diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -635,30 +635,42 @@ template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; - Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); + bool DstIsKill = MI.getOperand(0).isKill(); bool SrcIsKill = MI.getOperand(1).isKill(); - unsigned OpLo = AVR::LDRdPtr; - unsigned OpHi = AVR::LDDRdPtrQ; - TRI->splitReg(DstReg, DstLoReg, DstHiReg); + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); // DstReg has an earlyclobber so the register allocator will allocate them in // separate registers. assert(DstReg != SrcReg && "Dst and Src registers are the same!"); - // Load low byte. - buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define) - .addReg(SrcReg) - .setMemRefs(MI.memoperands()); + if (STI.hasTinyEncoding()) { + // Handle this case in the expansion of LDDWRdPtrQ because it is very + // similar. + buildMI(MBB, MBBI, AVR::LDDWRdPtrQ) + .addDef(DstReg, getKillRegState(DstIsKill)) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(0) + .setMemRefs(MI.memoperands()); - // Load high byte. - buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(1) - .setMemRefs(MI.memoperands()); + } else { + Register DstLoReg, DstHiReg; + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // Load low byte. + buildMI(MBB, MBBI, AVR::LDRdPtr) + .addReg(DstLoReg, RegState::Define) + .addReg(SrcReg) + .setMemRefs(MI.memoperands()); + + // Load high byte. + buildMI(MBB, MBBI, AVR::LDDRdPtrQ) + .addReg(DstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(1) + .setMemRefs(MI.memoperands()); + } MI.eraseFromParent(); return true; @@ -733,14 +745,12 @@ template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; - Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); + bool DstIsKill = MI.getOperand(0).isKill(); bool SrcIsKill = MI.getOperand(1).isKill(); - unsigned OpLo = AVR::LDDRdPtrQ; - unsigned OpHi = AVR::LDDRdPtrQ; - TRI->splitReg(DstReg, DstLoReg, DstHiReg); + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); // Since we add 1 to the Imm value for the high byte below, and 63 is the // highest Imm value allowed for the instruction, 62 is the limit here. @@ -750,19 +760,51 @@ // separate registers. assert(DstReg != SrcReg && "Dst and Src registers are the same!"); - // Load low byte. - buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define) - .addReg(SrcReg) - .addImm(Imm) - .setMemRefs(MI.memoperands()); + if (STI.hasTinyEncoding()) { + // Reduced tiny cores don't support load/store with displacement. However, + // they do support postincrement. So we'll simply adjust the pointer before + // and after and use postincrement to load multiple registers. + + // Add offset. The offset can be 0 when expanding this instruction from the + // more specific LDWRdPtr instruction. + if (Imm != 0) { + buildMI(MBB, MBBI, AVR::SUBIWRdK, SrcReg) + .addReg(SrcReg) + .addImm(0x10000 - Imm); + } - // Load high byte. - buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1) - .setMemRefs(MI.memoperands()); + // Do a word load with postincrement. This will be lowered to a two byte + // load. + buildMI(MBB, MBBI, AVR::LDWRdPtrPi) + .addDef(DstReg, getKillRegState(DstIsKill)) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(0) + .setMemRefs(MI.memoperands()); + + // If the pointer is used after the store instruction, subtract the new + // offset (with 2 added after the postincrement instructions) so it is the + // same as before. + if (!SrcIsKill) { + buildMI(MBB, MBBI, AVR::SUBIWRdK, SrcReg).addReg(SrcReg).addImm(Imm + 2); + } + } else { + Register DstLoReg, DstHiReg; + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // Load low byte. + buildMI(MBB, MBBI, AVR::LDDRdPtrQ) + .addReg(DstLoReg, RegState::Define) + .addReg(SrcReg) + .addImm(Imm) + .setMemRefs(MI.memoperands()); + + // Load high byte. + buildMI(MBB, MBBI, AVR::LDDRdPtrQ) + .addReg(DstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(Imm + 1) + .setMemRefs(MI.memoperands()); + } MI.eraseFromParent(); return true; @@ -972,27 +1014,39 @@ template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; - Register SrcLoReg, SrcHiReg; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); + bool DstIsKill = MI.getOperand(0).isKill(); bool DstIsUndef = MI.getOperand(0).isUndef(); bool SrcIsKill = MI.getOperand(1).isKill(); - unsigned OpLo = AVR::STPtrRr; - unsigned OpHi = AVR::STDPtrQRr; - TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); //: TODO: need to reverse this order like inw and stsw? - auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstReg, getUndefRegState(DstIsUndef)) - .addReg(SrcLoReg, getKillRegState(SrcIsKill)); - auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstReg, getUndefRegState(DstIsUndef)) - .addImm(1) - .addReg(SrcHiReg, getKillRegState(SrcIsKill)); + if (STI.hasTinyEncoding()) { + // Handle this case in the expansion of STDWPtrQRr because it is very + // similar. + buildMI(MBB, MBBI, AVR::STDWPtrQRr) + .addReg(DstReg, + getKillRegState(DstIsKill) | getUndefRegState(DstIsUndef)) + .addImm(0) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .setMemRefs(MI.memoperands()); - MIBLO.setMemRefs(MI.memoperands()); - MIBHI.setMemRefs(MI.memoperands()); + } else { + Register SrcLoReg, SrcHiReg; + TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); + buildMI(MBB, MBBI, AVR::STPtrRr) + .addReg(DstReg, getUndefRegState(DstIsUndef)) + .addReg(SrcLoReg, getKillRegState(SrcIsKill)) + .setMemRefs(MI.memoperands()); + + buildMI(MBB, MBBI, AVR::STDPtrQRr) + .addReg(DstReg, getUndefRegState(DstIsUndef)) + .addImm(1) + .addReg(SrcHiReg, getKillRegState(SrcIsKill)) + .setMemRefs(MI.memoperands()); + } MI.eraseFromParent(); return true; @@ -1071,6 +1125,7 @@ template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); Register DstReg = MI.getOperand(0).getReg(); bool DstIsKill = MI.getOperand(0).isKill(); @@ -1079,23 +1134,32 @@ bool SrcIsKill = MI.getOperand(2).isKill(); // STD's maximum displacement is 63, so larger stores have to be split into a - // set of operations - if (Imm >= 63) { - if (!DstIsKill) { - buildMI(MBB, MBBI, AVR::PUSHWRr).addReg(DstReg); + // set of operations. + // For avrtiny chips, STD is not available at all so we always have to fall + // back to manual pointer adjustments. + if (Imm >= 63 || STI.hasTinyEncoding()) { + // Add offset. The offset can be 0 when expanding this instruction from the + // more specific STWPtrRr instruction. + if (Imm != 0) { + buildMI(MBB, MBBI, AVR::SUBIWRdK, DstReg) + .addReg(DstReg, RegState::Kill) + .addImm(0x10000 - Imm); } - buildMI(MBB, MBBI, AVR::SUBIWRdK) - .addReg(DstReg, RegState::Define) - .addReg(DstReg, RegState::Kill) - .addImm(-Imm); - - buildMI(MBB, MBBI, AVR::STWPtrRr) - .addReg(DstReg, RegState::Kill) - .addReg(SrcReg, getKillRegState(SrcIsKill)); + // Do the store. This is a word store, that will be expanded further. + buildMI(MBB, MBBI, AVR::STWPtrPiRr, DstReg) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(0) + .setMemRefs(MI.memoperands()); + // If the pointer is used after the store instruction, subtract the new + // offset (with 2 added after the postincrement instructions) so it is the + // same as before. if (!DstIsKill) { - buildMI(MBB, MBBI, AVR::POPWRd).addDef(DstReg, RegState::Define); + buildMI(MBB, MBBI, AVR::SUBIWRdK, DstReg) + .addReg(DstReg, RegState::Kill) + .addImm(Imm + 2); } } else { unsigned OpLo = AVR::STDPtrQRr; diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -121,7 +121,8 @@ } // Reserve the necessary frame memory by doing FP -= . - unsigned Opcode = (isUInt<6>(FrameSize)) ? AVR::SBIWRdK : AVR::SUBIWRdK; + unsigned Opcode = (isUInt<6>(FrameSize) && STI.hasADDSUBIW()) ? AVR::SBIWRdK + : AVR::SUBIWRdK; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28) .addReg(AVR::R29R28, RegState::Kill) @@ -202,7 +203,7 @@ unsigned Opcode; // Select the optimal opcode depending on how big it is. - if (isUInt<6>(FrameSize)) { + if (isUInt<6>(FrameSize) && STI.hasADDSUBIW()) { Opcode = AVR::ADIWRdK; } else { Opcode = AVR::SUBIWRdK; @@ -384,7 +385,7 @@ // Select the best opcode to adjust SP based on the offset size. unsigned AddOpcode; - if (isUInt<6>(Amount)) { + if (isUInt<6>(Amount) && STI.hasADDSUBIW()) { AddOpcode = AVR::ADIWRdK; } else { AddOpcode = AVR::SUBIWRdK; @@ -457,7 +458,8 @@ int Opcode = MI.getOpcode(); if ((Opcode != AVR::LDDRdPtrQ) && (Opcode != AVR::LDDWRdPtrQ) && - (Opcode != AVR::STDPtrQRr) && (Opcode != AVR::STDWPtrQRr)) { + (Opcode != AVR::STDPtrQRr) && (Opcode != AVR::STDWPtrQRr) && + (Opcode != AVR::FRMIDX)) { continue; } diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -320,6 +320,9 @@ def HasTinyEncoding : Predicate<"Subtarget->hasTinyEncoding()">, AssemblerPredicate<(all_of FeatureTinyEncoding)>; +def HasNonTinyEncoding : Predicate<"!Subtarget->hasTinyEncoding()">, + AssemblerPredicate<(all_of (not FeatureTinyEncoding))>; + // AVR specific condition code. These correspond to AVR_*_COND in // AVRInstrInfo.td. They must be kept in synch. def AVR_COND_EQ : PatLeaf<(i8 0)>; @@ -1283,7 +1286,7 @@ "lds\t$rd, $k", [(set i8 : $rd, (load imm : $k))]>, - Requires<[HasSRAM]>; + Requires<[HasSRAM, HasNonTinyEncoding]>; // LDSW Rd+1:Rd, K+1:K // @@ -1297,7 +1300,7 @@ "ldsw\t$dst, $src", [(set i16 : $dst, (load imm : $src))]>, - Requires<[HasSRAM]>; + Requires<[HasSRAM, HasNonTinyEncoding]>; } // Indirect loads. @@ -1315,8 +1318,12 @@ // LDW Rd+1:Rd, P // // Expands to: - // ld Rd, P - // ldd Rd+1, P+1 + // ld Rd, P + // ldd Rd+1, P+1 + // On reduced tiny cores, this instruction expands to: + // ld Rd, P+ + // ld Rd+1, P+ + // subiw P, 2 let Constraints = "@earlyclobber $reg" in def LDWRdPtr : Pseudo<(outs DREGS : $reg), @@ -1386,13 +1393,18 @@ "ldd\t$reg, $memri", [(set i8 : $reg, (load addr : $memri))]>, - Requires<[HasSRAM]>; + Requires<[HasSRAM, HasNonTinyEncoding]>; // LDDW Rd+1:Rd, P+q // // Expands to: - // ldd Rd, P+q - // ldd Rd+1, P+q+1 + // ldd Rd, P+q + // ldd Rd+1, P+q+1 + // On reduced tiny cores, this instruction expands to: + // subiw P, -q + // ld Rd, P+ + // ld Rd+1, P+ + // subiw P, q+2 let Constraints = "@earlyclobber $dst" in def LDDWRdPtrQ : Pseudo<(outs DREGS : $dst), @@ -1492,7 +1504,7 @@ "sts\t$k, $rd", [(store i8 : $rd, imm : $k)]>, - Requires<[HasSRAM]>; + Requires<[HasSRAM, HasNonTinyEncoding]>; // STSW K+1:K, Rr+1:Rr // @@ -1506,7 +1518,7 @@ "stsw\t$dst, $src", [(store i16 : $src, imm : $dst)]>, - Requires<[HasSRAM]>; + Requires<[HasSRAM, HasNonTinyEncoding]>; // Indirect stores. // ST P, Rr @@ -1524,8 +1536,12 @@ // Stores the value of Rr into the location addressed by pointer P. // // Expands to: -// st P, Rr -// std P+1, Rr+1 +// st P, Rr +// std P+1, Rr+1 +// On reduced tiny cores, this instruction expands to: +// st P+, Rr +// st P+, Rr+1 +// subiw P, q+2 def STWPtrRr : Pseudo<(outs), (ins PTRDISPREGS : $ptrreg, DREGS @@ -1624,15 +1640,20 @@ "std\t$memri, $reg", [(store i8 : $reg, addr : $memri)]>, - Requires<[HasSRAM]>; + Requires<[HasSRAM, HasNonTinyEncoding]>; // STDW P+q, Rr+1:Rr // Stores the value of Rr into the location addressed by pointer P with a // displacement of q. Does not modify P. // // Expands to: -// std P+q, Rr -// std P+q+1, Rr+1 +// std P+q, Rr +// std P+q+1, Rr+1 +// On reduced tiny cores, this instruction expands to: +// subiw P, -q +// st P+, Rr +// st P+, Rr+1 +// subiw P, q+2 def STDWPtrQRr : Pseudo<(outs), (ins memri : $memri, DREGS @@ -2413,7 +2434,8 @@ : $src2), (SBIWRdK i16 : $src1, (imm0_63_neg - : $src2))>; + : $src2))>, + Requires<[HasADDSUBIW]>; def : Pat<(add i16 : $src1, imm : $src2), @@ -2481,26 +2503,18 @@ (SUBIWRdK i16 : $src, tglobaladdr : $src2)>; -def : Pat<(i8(load(AVRWrapper tglobaladdr - : $dst))), - (LDSRdK tglobaladdr - : $dst)>; -def : Pat<(i16(load(AVRWrapper tglobaladdr - : $dst))), - (LDSWRdK tglobaladdr - : $dst)>; -def : Pat<(store i8 - : $src, (i16(AVRWrapper tglobaladdr - : $dst))), - (STSKRr tglobaladdr - : $dst, i8 - : $src)>; -def : Pat<(store i16 - : $src, (i16(AVRWrapper tglobaladdr - : $dst))), - (STSWKRr tglobaladdr - : $dst, i16 - : $src)>; +def : Pat<(i8(load(AVRWrapper tglobaladdr:$dst))), + (LDSRdK tglobaladdr:$dst)>, + Requires<[HasSRAM, HasNonTinyEncoding]>; +def : Pat<(i16(load(AVRWrapper tglobaladdr:$dst))), + (LDSWRdK tglobaladdr:$dst)>, + Requires<[HasSRAM, HasNonTinyEncoding]>; +def : Pat<(store i8:$src, (i16(AVRWrapper tglobaladdr:$dst))), + (STSKRr tglobaladdr:$dst, i8:$src)>, + Requires<[HasSRAM, HasNonTinyEncoding]>; +def : Pat<(store i16:$src, (i16(AVRWrapper tglobaladdr:$dst))), + (STSWKRr tglobaladdr:$dst, i16:$src)>, + Requires<[HasSRAM, HasNonTinyEncoding]>; // BlockAddress def : Pat<(i16(AVRWrapper tblockaddress diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp --- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -166,18 +166,28 @@ // instruction. We have only two-address instructions, thus we need to // expand it into move + add. if (MI.getOpcode() == AVR::FRMIDX) { - MI.setDesc(TII.get(AVR::MOVWRdRr)); - MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false); - MI.removeOperand(2); + Register DstReg = MI.getOperand(0).getReg(); + assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); + + // Copy the frame pointer. + if (STI.hasMOVW()) { + BuildMI(MBB, MI, dl, TII.get(AVR::MOVWRdRr), DstReg) + .addReg(AVR::R29R28); + } else { + Register DstLoReg, DstHiReg; + splitReg(DstReg, DstLoReg, DstHiReg); + BuildMI(MBB, MI, dl, TII.get(AVR::MOVRdRr), DstLoReg) + .addReg(AVR::R28); + BuildMI(MBB, MI, dl, TII.get(AVR::MOVRdRr), DstHiReg) + .addReg(AVR::R29); + } assert(Offset > 0 && "Invalid offset"); // We need to materialize the offset via an add instruction. unsigned Opcode; - Register DstReg = MI.getOperand(0).getReg(); - assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); - II++; // Skip over the FRMIDX (and now MOVW) instruction. + II++; // Skip over the FRMIDX instruction. // Generally, to load a frame address two add instructions are emitted that // could get folded into a single one: @@ -195,7 +205,7 @@ case AVR::R25R24: case AVR::R27R26: case AVR::R31R30: { - if (isUInt<6>(Offset)) { + if (isUInt<6>(Offset) && STI.hasADDSUBIW()) { Opcode = AVR::ADIWRdK; break; } @@ -214,19 +224,28 @@ .addImm(Offset); New->getOperand(3).setIsDead(); + MI.eraseFromParent(); // remove FRMIDX + return false; } + // On most AVRs, we can use an offset up to 62 for load/store with + // displacement (63 for byte values, 62 for word values). However, the + // "reduced tiny" cores don't support load/store with displacement. So for + // them, we force an offset of 0 meaning that any positive offset will require + // adjusting the frame pointer. + int MaxOffset = STI.hasTinyEncoding() ? 0 : 62; + // If the offset is too big we have to adjust and restore the frame pointer // to materialize a valid load/store with displacement. //: TODO: consider using only one adiw/sbiw chain for more than one frame //: index - if (Offset > 62) { + if (Offset > MaxOffset) { unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK; - int AddOffset = Offset - 63 + 1; + int AddOffset = Offset - MaxOffset; // For huge offsets where adiw/sbiw cannot be used use a pair of subi/sbci. - if ((Offset - 63 + 1) > 63) { + if ((Offset - MaxOffset) > 63 || !STI.hasADDSUBIW()) { AddOpc = AVR::SUBIWRdK; SubOpc = AVR::SUBIWRdK; AddOffset = -AddOffset; @@ -253,9 +272,9 @@ // cond branch it will be using a dead register. BuildMI(MBB, std::next(II), dl, TII.get(SubOpc), AVR::R29R28) .addReg(AVR::R29R28, RegState::Kill) - .addImm(Offset - 63 + 1); + .addImm(Offset - MaxOffset); - Offset = 62; + Offset = MaxOffset; } MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false); diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/tiny.ll b/llvm/test/CodeGen/AVR/calling-conv/c/tiny.ll --- a/llvm/test/CodeGen/AVR/calling-conv/c/tiny.ll +++ b/llvm/test/CodeGen/AVR/calling-conv/c/tiny.ll @@ -40,8 +40,6 @@ ; NOTE: %a(i16), %b(i16) and %c(i16) each costs two registers, ; while %d(i16) is passed via the stack. -; FIXME: The `ldd` instruction is invalid on avrtiny, this test just shows -; how arguments are passed. define i16 @foo3(i16 %a, i16 %b, i16 %c, i16 %d) { ; CHECK-LABEL: foo3: ; CHECK: ; %bb.0: @@ -49,8 +47,16 @@ ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 ; CHECK-NEXT: in r29, 62 -; CHECK-NEXT: ldd r30, Y+5 -; CHECK-NEXT: ldd r31, Y+6 +; CHECK-NEXT: in r16, 63 +; CHECK-NEXT: subi r28, 251 +; CHECK-NEXT: sbci r29, 255 +; CHECK-NEXT: ld r30, Y+ +; CHECK-NEXT: ld r31, Y+ +; CHECK-NEXT: subi r28, 2 +; CHECK-NEXT: sbci r29, 0 +; CHECK-NEXT: subi r28, 5 +; CHECK-NEXT: sbci r29, 0 +; CHECK-NEXT: out 63, r16 ; CHECK-NEXT: sub r20, r30 ; CHECK-NEXT: sbc r21, r31 ; CHECK-NEXT: sub r24, r22 @@ -67,8 +73,6 @@ } ; NOTE: %a(i32) costs four registers, while %b(i32) is passed via the stack. -; FIXME: The `ldd` instruction is invalid on avrtiny, this test just shows -; how arguments are passed. define i32 @foo4(i32 %a, i32 %b) { ; CHECK-LABEL: foo4: ; CHECK: ; %bb.0: @@ -76,10 +80,26 @@ ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 ; CHECK-NEXT: in r29, 62 -; CHECK-NEXT: ldd r20, Y+5 -; CHECK-NEXT: ldd r21, Y+6 -; CHECK-NEXT: ldd r30, Y+7 -; CHECK-NEXT: ldd r31, Y+8 +; CHECK-NEXT: in r16, 63 +; CHECK-NEXT: subi r28, 251 +; CHECK-NEXT: sbci r29, 255 +; CHECK-NEXT: ld r20, Y+ +; CHECK-NEXT: ld r21, Y+ +; CHECK-NEXT: subi r28, 2 +; CHECK-NEXT: sbci r29, 0 +; CHECK-NEXT: subi r28, 5 +; CHECK-NEXT: sbci r29, 0 +; CHECK-NEXT: out 63, r16 +; CHECK-NEXT: in r16, 63 +; CHECK-NEXT: subi r28, 249 +; CHECK-NEXT: sbci r29, 255 +; CHECK-NEXT: ld r30, Y+ +; CHECK-NEXT: ld r31, Y+ +; CHECK-NEXT: subi r28, 2 +; CHECK-NEXT: sbci r29, 0 +; CHECK-NEXT: subi r28, 7 +; CHECK-NEXT: sbci r29, 0 +; CHECK-NEXT: out 63, r16 ; CHECK-NEXT: sub r20, r22 ; CHECK-NEXT: sbc r21, r23 ; CHECK-NEXT: sbc r30, r24 @@ -96,8 +116,6 @@ } ; NOTE: %0 costs six registers, while %1 is passed via the stack. -; FIXME: The `ldd` instruction is invalid on avrtiny, this test just shows -; how arguments are passed. define i8 @foo5([5 x i8] %0, i8 %1) { ; CHECK-LABEL: foo5: ; CHECK: ; %bb.0: @@ -105,7 +123,11 @@ ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 ; CHECK-NEXT: in r29, 62 -; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: mov r26, r28 +; CHECK-NEXT: mov r27, r29 +; CHECK-NEXT: subi r26, 251 +; CHECK-NEXT: sbci r27, 255 +; CHECK-NEXT: ld r24, X ; CHECK-NEXT: add r24, r20 ; CHECK-NEXT: pop r29 ; CHECK-NEXT: pop r28 @@ -129,8 +151,6 @@ ; NOTE: %0 cost four registers, while %1 is passed via the stack, ; though there are two vacant registers. -; FIXME: The `ldd` instruction is invalid on avrtiny, this test just shows -; how arguments are passed. define i8 @foo7([3 x i8] %0, [3 x i8] %1) { ; CHECK-LABEL: foo7: ; CHECK: ; %bb.0: @@ -138,7 +158,11 @@ ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 ; CHECK-NEXT: in r29, 62 -; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: mov r26, r28 +; CHECK-NEXT: mov r27, r29 +; CHECK-NEXT: subi r26, 251 +; CHECK-NEXT: sbci r27, 255 +; CHECK-NEXT: ld r24, X ; CHECK-NEXT: add r24, r22 ; CHECK-NEXT: pop r29 ; CHECK-NEXT: pop r28 @@ -151,8 +175,6 @@ ; NOTE: %0 costs four registers, and %1 costs two registers, while %2 is ; passed via the stack, though there is one vacant register. -; FIXME: The `ldd` instruction is invalid on avrtiny, this test just shows -; how arguments are passed. define i8 @foo8([3 x i8] %0, i8 %1, i8 %2) { ; CHECK-LABEL: foo8: ; CHECK: ; %bb.0: @@ -161,7 +183,11 @@ ; CHECK-NEXT: in r28, 61 ; CHECK-NEXT: in r29, 62 ; CHECK-NEXT: add r22, r20 -; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: mov r26, r28 +; CHECK-NEXT: mov r27, r29 +; CHECK-NEXT: subi r26, 251 +; CHECK-NEXT: sbci r27, 255 +; CHECK-NEXT: ld r24, X ; CHECK-NEXT: sub r24, r22 ; CHECK-NEXT: pop r29 ; CHECK-NEXT: pop r28 @@ -173,8 +199,6 @@ } ; NOTE: %0 is passed via registers, though there are 6 vacant registers. -; FIXME: The `ldd` instruction is invalid on avrtiny, this test just shows -; how arguments are passed. define i8 @foo9([7 x i8] %0) { ; CHECK-LABEL: foo9: ; CHECK: ; %bb.0: @@ -182,8 +206,16 @@ ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 ; CHECK-NEXT: in r29, 62 -; CHECK-NEXT: ldd r25, Y+6 -; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: mov r26, r28 +; CHECK-NEXT: mov r27, r29 +; CHECK-NEXT: subi r26, 250 +; CHECK-NEXT: sbci r27, 255 +; CHECK-NEXT: ld r25, X +; CHECK-NEXT: mov r26, r28 +; CHECK-NEXT: mov r27, r29 +; CHECK-NEXT: subi r26, 251 +; CHECK-NEXT: sbci r27, 255 +; CHECK-NEXT: ld r24, X ; CHECK-NEXT: add r24, r25 ; CHECK-NEXT: pop r29 ; CHECK-NEXT: pop r28 @@ -195,8 +227,6 @@ } ; NOTE: %0 costs six registers, while %1 and %2 are passed via the stack. -; FIXME: The `ldd` instruction is invalid on avrtiny, this test just shows -; how arguments are passed. define i8 @fooa([6 x i8] %0, i8 %1, i8 %2) { ; CHECK-LABEL: fooa: ; CHECK: ; %bb.0: @@ -204,8 +234,16 @@ ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 ; CHECK-NEXT: in r29, 62 -; CHECK-NEXT: ldd r25, Y+5 -; CHECK-NEXT: ldd r24, Y+6 +; CHECK-NEXT: mov r26, r28 +; CHECK-NEXT: mov r27, r29 +; CHECK-NEXT: subi r26, 251 +; CHECK-NEXT: sbci r27, 255 +; CHECK-NEXT: ld r25, X +; CHECK-NEXT: mov r26, r28 +; CHECK-NEXT: mov r27, r29 +; CHECK-NEXT: subi r26, 250 +; CHECK-NEXT: sbci r27, 255 +; CHECK-NEXT: ld r24, X ; CHECK-NEXT: sub r24, r25 ; CHECK-NEXT: sub r24, r20 ; CHECK-NEXT: pop r29 diff --git a/llvm/test/CodeGen/AVR/directmem.ll b/llvm/test/CodeGen/AVR/directmem.ll --- a/llvm/test/CodeGen/AVR/directmem.ll +++ b/llvm/test/CodeGen/AVR/directmem.ll @@ -1,4 +1,5 @@ ; RUN: llc -mattr=sram,addsubiw < %s -march=avr | FileCheck %s +; RUN: llc -mattr=sram,avrtiny < %s -march=avr | FileCheck %s --check-prefix=CHECK-TINY @char = common global i8 0 @char.array = common global [3 x i8] zeroinitializer @@ -20,6 +21,11 @@ ; CHECK-LABEL: global8_store: ; CHECK: ldi [[REG:r[0-9]+]], 6 ; CHECK: sts char, [[REG]] +; CHECK-TINY-LABEL: global8_store: +; CHECK-TINY: ldi [[REG1:r[0-9]+]], 6 +; CHECK-TINY: ldi [[REG2:r[0-9]+]], lo8(char) +; CHECK-TINY: ldi [[REG3:r[0-9]+]], hi8(char) +; CHECK-TINY: st [[REG4:[X-Z]]], [[REG1]] store i8 6, i8* @char ret void } @@ -27,6 +33,10 @@ define i8 @global8_load() { ; CHECK-LABEL: global8_load: ; CHECK: lds r24, char +; CHECK-TINY-LABEL: global8_load: +; CHECK-TINY: ldi [[REG1:r[0-9]+]], lo8(char) +; CHECK-TINY: ldi [[REG2:r[0-9]+]], hi8(char) +; CHECK-TINY: ld [[REG3:r[0-9]+]], [[REG4:[X-Z]]] %result = load i8, i8* @char ret i8 %result } diff --git a/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir b/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir --- a/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir +++ b/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir @@ -1,4 +1,5 @@ -# RUN: llc -O0 %s -o - -march=avr | FileCheck %s +# RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=avr-expand-pseudo -mattr=avrtiny %s -o - | FileCheck %s --check-prefix=CHECK-TINY # This test checks the expansion of the 16-bit 'LDDWRdPtrQ' pseudo instruction. @@ -19,8 +20,15 @@ ; CHECK-LABEL: test_lddwrdptrq - ; CHECK: ldd r24, Z+10 - ; CHECK-NEXT: ldd r25, Z+11 + ; CHECK: $r24 = LDDRdPtrQ $r31r30, 10 + ; CHECK-NEXT: $r25 = LDDRdPtrQ $r31r30, 11 + + ; CHECK-TINY: $r30 = SUBIRdK $r30, 246, implicit-def $sreg + ; CHECK-TINY-NEXT: $r31 = SBCIRdK $r31, 255, implicit-def $sreg, implicit killed $sreg + ; CHECK-TINY-NEXT: $r24, $r31r30 = LDRdPtrPi killed $r31r30 + ; CHECK-TINY-NEXT: $r25, $r31r30 = LDRdPtrPi killed $r31r30 + ; CHECK-TINY-NEXT: $r30 = SUBIRdK $r30, 12, implicit-def $sreg + ; CHECK-TINY-NEXT: $r31 = SBCIRdK $r31, 0, implicit-def $sreg, implicit killed $sreg early-clobber $r25r24 = LDDWRdPtrQ undef $r31r30, 10 ... diff --git a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir b/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir --- a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir +++ b/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir @@ -1,4 +1,5 @@ # RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=avr-expand-pseudo -mattr=avrtiny %s -o - | FileCheck %s --check-prefix=CHECK-TINY # This test checks the expansion of the 16-bit LDWRdPtr pseudo instruction. @@ -21,5 +22,10 @@ ; CHECK: $r0 = LDRdPtr $r31r30 ; CHECK-NEXT: $r1 = LDDRdPtrQ $r31r30, 1 + ; CHECK-TINY: $r0, $r31r30 = LDRdPtrPi killed $r31r30 + ; CHECK-TINY-NEXT: $r1, $r31r30 = LDRdPtrPi killed $r31r30 + ; CHECK-TINY-NEXT: $r30 = SUBIRdK $r30, 2, implicit-def $sreg + ; CHECK-TINY-NEXT: $r31 = SBCIRdK $r31, 0, implicit-def $sreg, implicit killed $sreg + $r1r0 = LDWRdPtr $r31r30 ... diff --git a/llvm/test/CodeGen/AVR/pseudo/STDWPtrQRr.mir b/llvm/test/CodeGen/AVR/pseudo/STDWPtrQRr.mir --- a/llvm/test/CodeGen/AVR/pseudo/STDWPtrQRr.mir +++ b/llvm/test/CodeGen/AVR/pseudo/STDWPtrQRr.mir @@ -1,4 +1,5 @@ # RUN: llc -O0 -run-pass=avr-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=avr-expand-pseudo -verify-machineinstrs -mattr=avrtiny %s -o - | FileCheck %s --check-prefix=CHECK-TINY --- | target triple = "avr--" @@ -18,6 +19,12 @@ ; Small displacement (<63): ; CHECK: STDPtrQRr $r29r28, 3, $r0 ; CHECK-NEXT: STDPtrQRr $r29r28, 4, $r1 + ; CHECK-TINY: $r28 = SUBIRdK killed $r28, 253, implicit-def $sreg + ; CHECK-TINY-NEXT: $r29 = SBCIRdK killed $r29, 255, implicit-def $sreg, implicit killed $sreg + ; CHECK-TINY-NEXT: early-clobber $r29r28 = STPtrPiRr killed $r29r28, $r0, 0 + ; CHECK-TINY-NEXT: early-clobber $r29r28 = STPtrPiRr killed $r29r28, $r1, 0 + ; CHECK-TINY-NEXT: $r28 = SUBIRdK killed $r28, 5, implicit-def $sreg + ; CHECK-TINY-NEXT: $r29 = SBCIRdK killed $r29, 0, implicit-def $sreg, implicit killed $sreg STDWPtrQRr $r29r28, 3, $r1r0 ; Small displacement where the destination register is killed: @@ -36,31 +43,33 @@ STDWPtrQRr $r29r28, 62, $r1r0 ; Large displacement (>=63): - ; CHECK: PUSHRr $r28, implicit-def $sp, implicit $sp - ; CHECK-NEXT: PUSHRr $r29, implicit-def $sp, implicit $sp - ; CHECK-NEXT: $r28 = SUBIRdK killed $r28, 193, implicit-def $sreg + ; CHECK: $r28 = SUBIRdK killed $r28, 193, implicit-def $sreg ; CHECK-NEXT: $r29 = SBCIRdK killed $r29, 255, implicit-def $sreg, implicit killed $sreg - ; CHECK-NEXT: STPtrRr $r29r28, $r0 - ; CHECK-NEXT: STDPtrQRr $r29r28, 1, $r1 - ; CHECK-NEXT: $r29 = POPRd implicit-def $sp, implicit $sp - ; CHECK-NEXT: $r28 = POPRd implicit-def $sp, implicit $sp + ; CHECK-NEXT: $r29r28 = STPtrPiRr killed $r29r28, $r0, 0 + ; CHECK-NEXT: $r29r28 = STPtrPiRr killed $r29r28, $r1, 0 + ; CHECK-NEXT: $r28 = SUBIRdK killed $r28, 65, implicit-def $sreg + ; CHECK-NEXT: $r29 = SBCIRdK killed $r29, 0, implicit-def $sreg, implicit killed $sreg + ; CHECK-TINY: $r28 = SUBIRdK killed $r28, 193, implicit-def $sreg + ; CHECK-TINY-NEXT: $r29 = SBCIRdK killed $r29, 255, implicit-def $sreg, implicit killed $sreg + ; CHECK-TINY-NEXT: $r29r28 = STPtrPiRr killed $r29r28, $r0, 0 + ; CHECK-TINY-NEXT: $r29r28 = STPtrPiRr killed $r29r28, $r1, 0 + ; CHECK-TINY-NEXT: $r28 = SUBIRdK killed $r28, 65, implicit-def $sreg + ; CHECK-TINY-NEXT: $r29 = SBCIRdK killed $r29, 0, implicit-def $sreg, implicit killed $sreg STDWPtrQRr $r29r28, 63, $r1r0 ; Large displacement where the destination register is killed: ; CHECK: $r28 = SUBIRdK killed $r28, 193, implicit-def $sreg ; CHECK-NEXT: $r29 = SBCIRdK killed $r29, 255, implicit-def $sreg, implicit killed $sreg - ; CHECK-NEXT: STPtrRr $r29r28, $r0 - ; CHECK-NEXT: STDPtrQRr $r29r28, 1, $r1 + ; CHECK-NEXT: $r29r28 = STPtrPiRr killed $r29r28, $r0 + ; CHECK-NEXT: $r29r28 = STPtrPiRr killed $r29r28, $r1 STDWPtrQRr killed $r29r28, 63, $r1r0 ; Large displacement where the source register is killed: - ; CHECK: PUSHRr $r28, implicit-def $sp, implicit $sp - ; CHECK-NEXT: PUSHRr $r29, implicit-def $sp, implicit $sp - ; CHECK-NEXT: $r28 = SUBIRdK killed $r28, 193, implicit-def $sreg + ; CHECK: $r28 = SUBIRdK killed $r28, 193, implicit-def $sreg ; CHECK-NEXT: $r29 = SBCIRdK killed $r29, 255, implicit-def $sreg, implicit killed $sreg - ; CHECK-NEXT: STPtrRr $r29r28, killed $r0 - ; CHECK-NEXT: STDPtrQRr $r29r28, 1, killed $r1 - ; CHECK-NEXT: $r29 = POPRd implicit-def $sp, implicit $sp - ; CHECK-NEXT: $r28 = POPRd implicit-def $sp, implicit $sp + ; CHECK-NEXT: STPtrPiRr killed $r29r28, killed $r0, 0 + ; CHECK-NEXT: STPtrPiRr killed $r29r28, killed $r1, 0 + ; CHECK-NEXT: $r28 = SUBIRdK killed $r28, 65, implicit-def $sreg + ; CHECK-NEXT: $r29 = SBCIRdK killed $r29, 0, implicit-def $sreg, implicit killed $sreg STDWPtrQRr $r29r28, 63, killed $r1r0 ... diff --git a/llvm/test/CodeGen/AVR/pseudo/STWPtrRr.mir b/llvm/test/CodeGen/AVR/pseudo/STWPtrRr.mir --- a/llvm/test/CodeGen/AVR/pseudo/STWPtrRr.mir +++ b/llvm/test/CodeGen/AVR/pseudo/STWPtrRr.mir @@ -1,4 +1,5 @@ # RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=avr-expand-pseudo -mattr=avrtiny %s -o - | FileCheck %s --check-prefix=CHECK-TINY # This test checks the expansion of the 16-bit STSWRdK pseudo instruction. @@ -21,5 +22,10 @@ ; CHECK: STPtrRr $r31r30, $r16 ; CHECK-NEXT: STDPtrQRr $r31r30, 1, $r17 + ; CHECK-TINY: $r31r30 = STPtrPiRr killed $r31r30, $r16, 0 + ; CHECK-TINY-NEXT: $r31r30 = STPtrPiRr killed $r31r30, $r17, 0 + ; CHECK-TINY-NEXT: $r30 = SUBIRdK killed $r30, 2, implicit-def $sreg + ; CHECK-TINY-NEXT: $r31 = SBCIRdK killed $r31, 0, implicit-def $sreg, implicit killed $sreg + STWPtrRr $r31r30, $r17r16 ... diff --git a/llvm/test/CodeGen/AVR/return.ll b/llvm/test/CodeGen/AVR/return.ll --- a/llvm/test/CodeGen/AVR/return.ll +++ b/llvm/test/CodeGen/AVR/return.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=avr6,sram -mtriple=avr < %s | FileCheck %s --check-prefix=AVR -; RUN: llc -mattr=tinyencoding -mtriple=avr < %s | FileCheck %s --check-prefix=TINY +; RUN: llc -mcpu=attiny10 -mtriple=avr < %s | FileCheck %s --check-prefix=TINY ;TODO: test returning byval structs ; TODO: test naked functions @@ -136,10 +136,26 @@ ; TINY-NEXT: push r29 ; TINY-NEXT: in r28, 61 ; TINY-NEXT: in r29, 62 -; TINY-NEXT: ldd r22, Y+9 -; TINY-NEXT: ldd r23, Y+10 -; TINY-NEXT: ldd r24, Y+11 -; TINY-NEXT: ldd r25, Y+12 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 247 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r22, Y+ +; TINY-NEXT: ld r23, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 9 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 245 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 11 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 ; TINY-NEXT: pop r29 ; TINY-NEXT: pop r28 ; TINY-NEXT: ret @@ -165,20 +181,32 @@ ; TINY-NEXT: ldi r21, 190 ; TINY-NEXT: mov r30, r24 ; TINY-NEXT: mov r31, r25 -; TINY-NEXT: std Z+6, r20 -; TINY-NEXT: std Z+7, r21 +; TINY-NEXT: subi r30, 250 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r20 +; TINY-NEXT: st Z+, r21 +; TINY-NEXT: subi r30, 8 +; TINY-NEXT: sbci r31, 0 ; TINY-NEXT: ldi r24, 25 ; TINY-NEXT: ldi r25, 22 -; TINY-NEXT: std Z+4, r24 -; TINY-NEXT: std Z+5, r25 +; TINY-NEXT: subi r30, 252 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 +; TINY-NEXT: subi r30, 6 +; TINY-NEXT: sbci r31, 0 ; TINY-NEXT: ldi r24, 104 ; TINY-NEXT: ldi r25, 37 -; TINY-NEXT: std Z+2, r24 -; TINY-NEXT: std Z+3, r25 +; TINY-NEXT: subi r30, 254 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 +; TINY-NEXT: subi r30, 4 +; TINY-NEXT: sbci r31, 0 ; TINY-NEXT: ldi r24, 204 ; TINY-NEXT: ldi r25, 204 -; TINY-NEXT: st Z, r24 -; TINY-NEXT: std Z+1, r25 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 ; TINY-NEXT: ret ret i64 13757395258967641292 } @@ -194,24 +222,68 @@ ; TINY-NEXT: push r29 ; TINY-NEXT: in r28, 61 ; TINY-NEXT: in r29, 62 -; TINY-NEXT: ldd r20, Y+11 -; TINY-NEXT: ldd r21, Y+12 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 245 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r20, Y+ +; TINY-NEXT: ld r21, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 11 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 ; TINY-NEXT: mov r30, r24 ; TINY-NEXT: mov r31, r25 -; TINY-NEXT: std Z+6, r20 -; TINY-NEXT: std Z+7, r21 -; TINY-NEXT: ldd r24, Y+9 -; TINY-NEXT: ldd r25, Y+10 -; TINY-NEXT: std Z+4, r24 -; TINY-NEXT: std Z+5, r25 -; TINY-NEXT: ldd r24, Y+7 -; TINY-NEXT: ldd r25, Y+8 -; TINY-NEXT: std Z+2, r24 -; TINY-NEXT: std Z+3, r25 -; TINY-NEXT: ldd r24, Y+5 -; TINY-NEXT: ldd r25, Y+6 -; TINY-NEXT: st Z, r24 -; TINY-NEXT: std Z+1, r25 +; TINY-NEXT: subi r30, 250 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r20 +; TINY-NEXT: st Z+, r21 +; TINY-NEXT: subi r30, 8 +; TINY-NEXT: sbci r31, 0 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 247 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 9 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: subi r30, 252 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 +; TINY-NEXT: subi r30, 6 +; TINY-NEXT: sbci r31, 0 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 249 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 7 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: subi r30, 254 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 +; TINY-NEXT: subi r30, 4 +; TINY-NEXT: sbci r31, 0 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 251 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 5 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 ; TINY-NEXT: pop r29 ; TINY-NEXT: pop r28 ; TINY-NEXT: ret @@ -243,24 +315,68 @@ ; TINY-NEXT: push r29 ; TINY-NEXT: in r28, 61 ; TINY-NEXT: in r29, 62 -; TINY-NEXT: ldd r20, Y+27 -; TINY-NEXT: ldd r21, Y+28 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 229 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r20, Y+ +; TINY-NEXT: ld r21, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 27 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 ; TINY-NEXT: mov r30, r24 ; TINY-NEXT: mov r31, r25 -; TINY-NEXT: std Z+6, r20 -; TINY-NEXT: std Z+7, r21 -; TINY-NEXT: ldd r24, Y+25 -; TINY-NEXT: ldd r25, Y+26 -; TINY-NEXT: std Z+4, r24 -; TINY-NEXT: std Z+5, r25 -; TINY-NEXT: ldd r24, Y+23 -; TINY-NEXT: ldd r25, Y+24 -; TINY-NEXT: std Z+2, r24 -; TINY-NEXT: std Z+3, r25 -; TINY-NEXT: ldd r24, Y+21 -; TINY-NEXT: ldd r25, Y+22 -; TINY-NEXT: st Z, r24 -; TINY-NEXT: std Z+1, r25 +; TINY-NEXT: subi r30, 250 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r20 +; TINY-NEXT: st Z+, r21 +; TINY-NEXT: subi r30, 8 +; TINY-NEXT: sbci r31, 0 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 231 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 25 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: subi r30, 252 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 +; TINY-NEXT: subi r30, 6 +; TINY-NEXT: sbci r31, 0 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 233 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 23 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: subi r30, 254 +; TINY-NEXT: sbci r31, 255 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 +; TINY-NEXT: subi r30, 4 +; TINY-NEXT: sbci r31, 0 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 235 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 21 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: st Z+, r24 +; TINY-NEXT: st Z+, r25 ; TINY-NEXT: pop r29 ; TINY-NEXT: pop r28 ; TINY-NEXT: ret @@ -288,10 +404,26 @@ ; TINY-NEXT: push r29 ; TINY-NEXT: in r28, 61 ; TINY-NEXT: in r29, 62 -; TINY-NEXT: ldd r22, Y+13 -; TINY-NEXT: ldd r23, Y+14 -; TINY-NEXT: ldd r24, Y+15 -; TINY-NEXT: ldd r25, Y+16 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 243 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r22, Y+ +; TINY-NEXT: ld r23, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 13 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 +; TINY-NEXT: in r16, 63 +; TINY-NEXT: subi r28, 241 +; TINY-NEXT: sbci r29, 255 +; TINY-NEXT: ld r24, Y+ +; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: subi r28, 2 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: subi r28, 15 +; TINY-NEXT: sbci r29, 0 +; TINY-NEXT: out 63, r16 ; TINY-NEXT: pop r29 ; TINY-NEXT: pop r28 ; TINY-NEXT: ret