diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -50,11 +50,6 @@ const AVRRegisterInfo *TRI; const TargetInstrInfo *TII; - /// The register to be used for temporary storage. - const Register SCRATCH_REGISTER = AVR::R0; - /// The register that will always contain zero. - const Register ZERO_REGISTER = AVR::R1; - bool expandMBB(Block &MBB); bool expandMI(Block &MBB, BlockIt MBBI); template bool expand(Block &MBB, BlockIt MBBI); @@ -442,6 +437,7 @@ template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); @@ -468,7 +464,7 @@ buildMI(MBB, MBBI, AVR::SBCRdRr) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstHiReg, getKillRegState(DstIsKill)) - .addReg(ZERO_REGISTER); + .addReg(STI.getZeroRegister()); if (ImpIsDead) MISBCI->getOperand(3).setIsDead(); // SREG is always implicitly killed @@ -889,7 +885,7 @@ // Store the SREG. buildMI(MBB, MBBI, AVR::INRdA) - .addReg(SCRATCH_REGISTER, RegState::Define) + .addReg(STI.getTmpRegister(), RegState::Define) .addImm(STI.getIORegSREG()); // Disable exceptions. @@ -900,7 +896,7 @@ // Restore the status reg. buildMI(MBB, MBBI, AVR::OUTARr) .addImm(STI.getIORegSREG()) - .addReg(SCRATCH_REGISTER); + .addReg(STI.getTmpRegister()); MI.eraseFromParent(); return true; @@ -1325,6 +1321,7 @@ // multiple registers, but when we actually need to rotate stuff, we have // to explicitly add the carry bit. + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); MachineInstr &MI = *MBBI; unsigned OpShift, OpCarry; Register DstReg = MI.getOperand(0).getReg(); @@ -1346,7 +1343,7 @@ auto MIB = buildMI(MBB, MBBI, OpCarry) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(ZERO_REGISTER); + .addReg(STI.getZeroRegister()); MIB->getOperand(3).setIsDead(); // SREG is always dead MIB->getOperand(4).setIsKill(); // SREG is always implicitly killed @@ -2360,7 +2357,7 @@ TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); buildMI(MBB, MBBI, AVR::INRdA) - .addReg(AVR::R0, RegState::Define) + .addReg(STI.getTmpRegister(), RegState::Define) .addImm(STI.getIORegSREG()) .setMIFlags(Flags); @@ -2373,7 +2370,7 @@ buildMI(MBB, MBBI, AVR::OUTARr) .addImm(STI.getIORegSREG()) - .addReg(AVR::R0, RegState::Kill) + .addReg(STI.getTmpRegister(), RegState::Kill) .setMIFlags(Flags); buildMI(MBB, MBBI, AVR::OUTARr) diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -70,23 +70,26 @@ // handlers before saving any other registers. if (AFI->isInterruptOrSignalHandler()) { BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) - .addReg(AVR::R0, RegState::Kill) + .addReg(STI.getTmpRegister(), RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0) + BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), STI.getTmpRegister()) .addImm(STI.getIORegSREG()) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) - .addReg(AVR::R0, RegState::Kill) + .addReg(STI.getTmpRegister(), RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (!MRI.reg_empty(AVR::R1)) { + if (!STI.hasTinyEncoding() && !MRI.reg_empty(STI.getZeroRegister())) { + // Saving and zeroing the zero register is needed on non-avrtiny chips + // because the mul instruction might destroy it. On avrtiny chips, the + // fixed zero register is never modified. BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) - .addReg(AVR::R1, RegState::Kill) + .addReg(STI.getZeroRegister(), RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr)) - .addReg(AVR::R1, RegState::Define) - .addReg(AVR::R1, RegState::Kill) - .addReg(AVR::R1, RegState::Kill) + .addReg(STI.getZeroRegister(), RegState::Define) + .addReg(STI.getZeroRegister(), RegState::Kill) + .addReg(STI.getZeroRegister(), RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } } @@ -149,14 +152,15 @@ // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal // handlers at the very end of the function, just before reti. if (AFI->isInterruptOrSignalHandler()) { - if (!MRI.reg_empty(AVR::R1)) { - BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R1); + if (!STI.hasTinyEncoding() && !MRI.reg_empty(STI.getZeroRegister())) { + // See emitPrologue: this is only needed on non-avrtiny chips. + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), STI.getZeroRegister()); } - BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), STI.getTmpRegister()); BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) .addImm(STI.getIORegSREG()) - .addReg(AVR::R0, RegState::Kill); - BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); + .addReg(STI.getTmpRegister(), RegState::Kill); + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), STI.getTmpRegister()); } } diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -192,8 +192,8 @@ MachineBasicBlock *insertWideShift(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *insertCopyR1(MachineInstr &MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *insertCopyZero(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *insertAtomicArithmeticOp(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const; diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -878,12 +878,12 @@ MachinePointerInfo(SV)); } -// Modify the existing ISD::INLINEASM node to add the implicit register r1. +// Modify the existing ISD::INLINEASM node to add the implicit zero register. SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { - SDValue R1Reg = DAG.getRegister(AVR::R1, MVT::i8); - if (Op.getOperand(Op.getNumOperands() - 1) == R1Reg || - Op.getOperand(Op.getNumOperands() - 2) == R1Reg) { - // R1 has already been added. Don't add it again. + SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8); + if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg || + Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) { + // Zero register has already been added. Don't add it again. // If this isn't handled, we get called over and over again. return Op; } @@ -892,8 +892,8 @@ // with some edits. // Add the following operands at the end (but before the glue node, if it's // there): - // - The flags of the implicit R1 register operand. - // - The implicit R1 register operand itself. + // - The flags of the implicit zero register operand. + // - The implicit zero register operand itself. SDLoc dl(Op); SmallVector Ops; SDNode *N = Op.getNode(); @@ -910,13 +910,13 @@ } unsigned Flags = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32)); - Ops.push_back(R1Reg); + Ops.push_back(ZeroReg); if (Glue) { Ops.push_back(Glue); } - // Replace the current INLINEASM node with a new one that has R1 as implicit - // parameter. + // Replace the current INLINEASM node with a new one that has the zero + // register as implicit parameter. SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops); DAG.ReplaceAllUsesOfValueWith(Op, New); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1)); @@ -1541,9 +1541,9 @@ Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); } - // The R1 register must be passed as an implicit register so that R1 is - // correctly zeroed in interrupts. - Ops.push_back(DAG.getRegister(AVR::R1, MVT::i8)); + // The zero register (usually R1) must be passed as an implicit register so + // that this register is correctly zeroed in interrupts. + Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -1666,11 +1666,11 @@ const AVRMachineFunctionInfo *AFI = MF.getInfo(); if (!AFI->isInterruptOrSignalHandler()) { - // The return instruction has an implicit R1 operand: it must contain zero - // on return. - // This is not needed in interrupts however, where R1 is handled specially - // (only pushed/popped when needed). - RetOps.push_back(DAG.getRegister(AVR::R1, MVT::i8)); + // The return instruction has an implicit zero register operand: it must + // contain zero on return. + // This is not needed in interrupts however, where the zero register is + // handled specially (only pushed/popped when needed). + RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)); } unsigned RetOpc = @@ -1836,6 +1836,7 @@ MutableArrayRef> Regs, int64_t ShiftAmt, bool ArithmeticShift) { const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + const AVRSubtarget &STI = BB->getParent()->getSubtarget(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); DebugLoc dl = MI.getDebugLoc(); @@ -1859,7 +1860,8 @@ // Create zero register. Register Zero = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Zero).addReg(AVR::R1); + BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Zero) + .addReg(STI.getZeroRegister()); // Rotate the least significant bit from the carry bit into a new register // (that starts out zero). @@ -1912,7 +1914,8 @@ } else { // Create a new zero register for zero extending. ExtMore = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ExtMore).addReg(AVR::R1); + BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ExtMore) + .addReg(STI.getZeroRegister()); // Rotate most significant bit into a new register (that starts out zero). BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Ext) .addReg(ExtMore) @@ -1956,7 +1959,7 @@ // Zero the least significant register. Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Out).addReg(AVR::R1); + BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Out).addReg(STI.getZeroRegister()); Regs[Regs.size() - 1] = std::pair(Out, 0); // Continue shifts with the leftover registers. @@ -1983,7 +1986,8 @@ .addReg(Tmp) .addReg(Tmp); } else { - BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ShrExtendReg).addReg(AVR::R1); + BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ShrExtendReg) + .addReg(STI.getZeroRegister()); } } Regs[0] = std::pair(ShrExtendReg, 0); @@ -2183,14 +2187,15 @@ return BB; } -// Insert a read from R1, which almost always contains the value 0. +// Insert a read from the zero register. MachineBasicBlock * -AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const { +AVRTargetLowering::insertCopyZero(MachineInstr &MI, + MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock::iterator I(MI); BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY)) .add(MI.getOperand(0)) - .addReg(AVR::R1); + .addReg(Subtarget.getZeroRegister()); MI.eraseFromParent(); return BB; } @@ -2202,7 +2207,6 @@ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock::iterator I(MI); - const Register SCRATCH_REGISTER = AVR::R0; DebugLoc dl = MI.getDebugLoc(); // Example instruction sequence, for an atomic 8-bit add: @@ -2220,7 +2224,7 @@ unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr; // Disable interrupts. - BuildMI(*BB, I, dl, TII.get(AVR::INRdA), SCRATCH_REGISTER) + BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister()) .addImm(Subtarget.getIORegSREG()); BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7); @@ -2242,7 +2246,7 @@ // Restore interrupts. BuildMI(*BB, I, dl, TII.get(AVR::OUTARr)) .addImm(Subtarget.getIORegSREG()) - .addReg(SCRATCH_REGISTER); + .addReg(Subtarget.getTmpRegister()); // Remove the pseudo instruction. MI.eraseFromParent(); @@ -2275,8 +2279,8 @@ case AVR::MULRdRr: case AVR::MULSRdRr: return insertMul(MI, MBB); - case AVR::CopyR1: - return insertCopyR1(MI, MBB); + case AVR::CopyZero: + return insertCopyZero(MI, MBB); case AVR::AtomicLoadAdd8: return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8); case AVR::AtomicLoadAdd16: @@ -2581,7 +2585,8 @@ break; case 't': // Temporary register: r0. if (VT == MVT::i8) - return std::make_pair(unsigned(AVR::R0), &AVR::GPR8RegClass); + return std::make_pair(unsigned(Subtarget.getTmpRegister()), + &AVR::GPR8RegClass); break; case 'w': // Special upper register pairs: r24, r26, r28, r30. if (VT == MVT::i8 || VT == MVT::i16) diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -918,6 +918,9 @@ // neg Rd+1 // neg Rd // sbc Rd+1, r1 + // + // Note: this pseudo instruction actually uses R17 on avrtiny. + // This is fine, because R17 is never modified on avrtiny. let Uses = [R1] in def NEGWRd : Pseudo<(outs DREGS : $rd), @@ -1990,6 +1993,8 @@ def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd", [(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>; + // Note: this pseudo instruction actually uses R17 on avrtiny. + // This is fine, because R17 is never modified on avrtiny. let Uses = [R1] in def ROLBRd : Pseudo<(outs GPR8 : $rd), @@ -2405,9 +2410,9 @@ "# Asr32 PSEUDO", [(set i16:$dstlo, i16:$dsthi, (AVRasrw i16:$srclo, i16:$srchi, i8:$cnt))]>; -// lowered to a copy from R1, which contains the value zero. +// lowered to a copy from the zero register. let usesCustomInserter=1 in -def CopyR1 : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>; +def CopyZero : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp --- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -236,7 +236,7 @@ // a compare and branch, invalidating the contents of SREG set by the // compare instruction because of the add/sub pairs. Conservatively save and // restore SREG before and after each add/sub pair. - BuildMI(MBB, II, dl, TII.get(AVR::INRdA), AVR::R0) + BuildMI(MBB, II, dl, TII.get(AVR::INRdA), STI.getTmpRegister()) .addImm(STI.getIORegSREG()); MachineInstr *New = BuildMI(MBB, II, dl, TII.get(AddOpc), AVR::R29R28) @@ -247,7 +247,7 @@ // Restore SREG. BuildMI(MBB, std::next(II), dl, TII.get(AVR::OUTARr)) .addImm(STI.getIORegSREG()) - .addReg(AVR::R0, RegState::Kill); + .addReg(STI.getTmpRegister(), RegState::Kill); // No need to set SREG as dead here otherwise if the next instruction is a // cond branch it will be using a dead register. diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -21,6 +21,7 @@ #include "AVRISelLowering.h" #include "AVRInstrInfo.h" #include "AVRSelectionDAGInfo.h" +#include "MCTargetDesc/AVRMCTargetDesc.h" #define GET_SUBTARGETINFO_HEADER #include "AVRGenSubtargetInfo.inc" @@ -102,6 +103,13 @@ int getRegTmpIndex() const { return hasTinyEncoding() ? 16 : 0; } int getRegZeroIndex() const { return hasTinyEncoding() ? 17 : 1; } + Register getTmpRegister() const { + return hasTinyEncoding() ? AVR::R16 : AVR::R0; + } + Register getZeroRegister() const { + return hasTinyEncoding() ? AVR::R17 : AVR::R1; + } + private: /// The ELF e_flags architecture. unsigned ELFArch; diff --git a/llvm/test/CodeGen/AVR/features/avr-tiny.ll b/llvm/test/CodeGen/AVR/features/avr-tiny.ll --- a/llvm/test/CodeGen/AVR/features/avr-tiny.ll +++ b/llvm/test/CodeGen/AVR/features/avr-tiny.ll @@ -1,9 +1,90 @@ -; RUN: llc -mattr=avrtiny -O0 < %s -march=avr | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=avrtiny -O0 < %s -mtriple=avr | FileCheck %s define i16 @reg_copy16(i16, i16 %a) { -; CHECK-LABEL: reg_copy16 -; CHECK: mov r24, r22 -; CHECK: mov r25, r23 - +; CHECK-LABEL: reg_copy16: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov r24, r22 +; CHECK-NEXT: mov r25, r23 +; CHECK-NEXT: ret ret i16 %a } + +define i8 @return_zero() { +; CHECK-LABEL: return_zero: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov r24, r17 +; CHECK-NEXT: ret + ret i8 0 +} + +define i8 @atomic_load8(i8* %foo) { +; CHECK-LABEL: atomic_load8: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov r26, r24 +; CHECK-NEXT: mov r27, r25 +; CHECK-NEXT: in r16, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: ld r24, X +; CHECK-NEXT: out 63, r16 +; CHECK-NEXT: ret + %val = load atomic i8, i8* %foo unordered, align 1 + ret i8 %val +} + +define avr_signalcc void @signal_handler_with_asm() { +; CHECK-LABEL: signal_handler_with_asm: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r16 +; CHECK-NEXT: in r16, 63 +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r24 +; CHECK-NEXT: ldi r24, 3 +; CHECK-NEXT: ;APP +; CHECK-NEXT: mov r24, r24 +; CHECK-NEXT: ;NO_APP +; CHECK-NEXT: pop r24 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: out 63, r16 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: reti + call i8 asm sideeffect "mov $0, $1", "=r,r"(i8 3) nounwind + ret void +} + +declare void @foo() + +define avr_signalcc void @signal_handler_with_call() { +; CHECK-LABEL: signal_handler_with_call: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r16 +; CHECK-NEXT: in r16, 63 +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r20 +; CHECK-NEXT: push r21 +; CHECK-NEXT: push r22 +; CHECK-NEXT: push r23 +; CHECK-NEXT: push r24 +; CHECK-NEXT: push r25 +; CHECK-NEXT: push r26 +; CHECK-NEXT: push r27 +; CHECK-NEXT: push r30 +; CHECK-NEXT: push r31 +; CHECK-NEXT: rcall foo +; CHECK-NEXT: pop r31 +; CHECK-NEXT: pop r30 +; CHECK-NEXT: pop r27 +; CHECK-NEXT: pop r26 +; CHECK-NEXT: pop r25 +; CHECK-NEXT: pop r24 +; CHECK-NEXT: pop r23 +; CHECK-NEXT: pop r22 +; CHECK-NEXT: pop r21 +; CHECK-NEXT: pop r20 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: out 63, r16 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: reti + call void @foo() + ret void +}