diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -50,11 +50,6 @@
   const AVRRegisterInfo *TRI;
   const TargetInstrInfo *TII;
 
-  /// The register to be used for temporary storage.
-  const Register SCRATCH_REGISTER = AVR::R0;
-  /// The register that will always contain zero.
-  const Register ZERO_REGISTER = AVR::R1;
-
   bool expandMBB(Block &MBB);
   bool expandMI(Block &MBB, BlockIt MBBI);
   template <unsigned OP> bool expand(Block &MBB, BlockIt MBBI);
@@ -442,6 +437,7 @@
 
 template <>
 bool AVRExpandPseudo::expand<AVR::NEGWRd>(Block &MBB, BlockIt MBBI) {
+  const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
   MachineInstr &MI = *MBBI;
   Register DstLoReg, DstHiReg;
   Register DstReg = MI.getOperand(0).getReg();
@@ -468,7 +464,7 @@
       buildMI(MBB, MBBI, AVR::SBCRdRr)
           .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
           .addReg(DstHiReg, getKillRegState(DstIsKill))
-          .addReg(ZERO_REGISTER);
+          .addReg(STI.getZeroRegister());
   if (ImpIsDead)
     MISBCI->getOperand(3).setIsDead();
   // SREG is always implicitly killed
@@ -889,7 +885,7 @@
 
   // Store the SREG.
   buildMI(MBB, MBBI, AVR::INRdA)
-      .addReg(SCRATCH_REGISTER, RegState::Define)
+      .addReg(STI.getTmpRegister(), RegState::Define)
       .addImm(STI.getIORegSREG());
 
   // Disable exceptions.
@@ -900,7 +896,7 @@
   // Restore the status reg.
   buildMI(MBB, MBBI, AVR::OUTARr)
       .addImm(STI.getIORegSREG())
-      .addReg(SCRATCH_REGISTER);
+      .addReg(STI.getTmpRegister());
 
   MI.eraseFromParent();
   return true;
@@ -1325,6 +1321,7 @@
   // multiple registers, but when we actually need to rotate stuff, we have
   // to explicitly add the carry bit.
 
+  const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
   MachineInstr &MI = *MBBI;
   unsigned OpShift, OpCarry;
   Register DstReg = MI.getOperand(0).getReg();
@@ -1346,7 +1343,7 @@
   auto MIB = buildMI(MBB, MBBI, OpCarry)
                  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
                  .addReg(DstReg, getKillRegState(DstIsKill))
-                 .addReg(ZERO_REGISTER);
+                 .addReg(STI.getZeroRegister());
 
   MIB->getOperand(3).setIsDead(); // SREG is always dead
   MIB->getOperand(4).setIsKill(); // SREG is always implicitly killed
@@ -2360,7 +2357,7 @@
   TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
 
   buildMI(MBB, MBBI, AVR::INRdA)
-      .addReg(AVR::R0, RegState::Define)
+      .addReg(STI.getTmpRegister(), RegState::Define)
       .addImm(STI.getIORegSREG())
       .setMIFlags(Flags);
 
@@ -2373,7 +2370,7 @@
 
   buildMI(MBB, MBBI, AVR::OUTARr)
       .addImm(STI.getIORegSREG())
-      .addReg(AVR::R0, RegState::Kill)
+      .addReg(STI.getTmpRegister(), RegState::Kill)
       .setMIFlags(Flags);
 
   buildMI(MBB, MBBI, AVR::OUTARr)
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -70,23 +70,26 @@
   // handlers before saving any other registers.
   if (AFI->isInterruptOrSignalHandler()) {
     BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr))
-        .addReg(AVR::R0, RegState::Kill)
+        .addReg(STI.getTmpRegister(), RegState::Kill)
         .setMIFlag(MachineInstr::FrameSetup);
 
-    BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0)
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), STI.getTmpRegister())
         .addImm(STI.getIORegSREG())
         .setMIFlag(MachineInstr::FrameSetup);
     BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr))
-        .addReg(AVR::R0, RegState::Kill)
+        .addReg(STI.getTmpRegister(), RegState::Kill)
         .setMIFlag(MachineInstr::FrameSetup);
-    if (!MRI.reg_empty(AVR::R1)) {
+    if (!STI.hasTinyEncoding() && !MRI.reg_empty(STI.getZeroRegister())) {
+      // Saving and zeroing the zero register is needed on non-avrtiny chips
+      // because the mul instruction might destroy it. On avrtiny chips, the
+      // fixed zero register is never modified.
       BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr))
-          .addReg(AVR::R1, RegState::Kill)
+          .addReg(STI.getZeroRegister(), RegState::Kill)
           .setMIFlag(MachineInstr::FrameSetup);
       BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr))
-          .addReg(AVR::R1, RegState::Define)
-          .addReg(AVR::R1, RegState::Kill)
-          .addReg(AVR::R1, RegState::Kill)
+          .addReg(STI.getZeroRegister(), RegState::Define)
+          .addReg(STI.getZeroRegister(), RegState::Kill)
+          .addReg(STI.getZeroRegister(), RegState::Kill)
           .setMIFlag(MachineInstr::FrameSetup);
     }
   }
@@ -149,14 +152,15 @@
   // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal
   // handlers at the very end of the function, just before reti.
   if (AFI->isInterruptOrSignalHandler()) {
-    if (!MRI.reg_empty(AVR::R1)) {
-      BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R1);
+    if (!STI.hasTinyEncoding() && !MRI.reg_empty(STI.getZeroRegister())) {
+      // See emitPrologue: this is only needed on non-avrtiny chips.
+      BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), STI.getZeroRegister());
     }
-    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), STI.getTmpRegister());
     BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
         .addImm(STI.getIORegSREG())
-        .addReg(AVR::R0, RegState::Kill);
-    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
+        .addReg(STI.getTmpRegister(), RegState::Kill);
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), STI.getTmpRegister());
   }
 }
 
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -192,8 +192,8 @@
   MachineBasicBlock *insertWideShift(MachineInstr &MI,
                                      MachineBasicBlock *BB) const;
   MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
-  MachineBasicBlock *insertCopyR1(MachineInstr &MI,
-                                  MachineBasicBlock *BB) const;
+  MachineBasicBlock *insertCopyZero(MachineInstr &MI,
+                                    MachineBasicBlock *BB) const;
   MachineBasicBlock *insertAtomicArithmeticOp(MachineInstr &MI,
                                               MachineBasicBlock *BB,
                                               unsigned Opcode, int Width) const;
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -878,12 +878,12 @@
                       MachinePointerInfo(SV));
 }
 
-// Modify the existing ISD::INLINEASM node to add the implicit register r1.
+// Modify the existing ISD::INLINEASM node to add the implicit zero register.
 SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
-  SDValue R1Reg = DAG.getRegister(AVR::R1, MVT::i8);
-  if (Op.getOperand(Op.getNumOperands() - 1) == R1Reg ||
-      Op.getOperand(Op.getNumOperands() - 2) == R1Reg) {
-    // R1 has already been added. Don't add it again.
+  SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8);
+  if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg ||
+      Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) {
+    // Zero register has already been added. Don't add it again.
     // If this isn't handled, we get called over and over again.
     return Op;
   }
@@ -892,8 +892,8 @@
   // with some edits.
   // Add the following operands at the end (but before the glue node, if it's
   // there):
-  //  - The flags of the implicit R1 register operand.
-  //  - The implicit R1 register operand itself.
+  //  - The flags of the implicit zero register operand.
+  //  - The implicit zero register operand itself.
   SDLoc dl(Op);
   SmallVector<SDValue, 8> Ops;
   SDNode *N = Op.getNode();
@@ -910,13 +910,13 @@
   }
   unsigned Flags = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
   Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32));
-  Ops.push_back(R1Reg);
+  Ops.push_back(ZeroReg);
   if (Glue) {
     Ops.push_back(Glue);
   }
 
-  // Replace the current INLINEASM node with a new one that has R1 as implicit
-  // parameter.
+  // Replace the current INLINEASM node with a new one that has the zero
+  // register as implicit parameter.
   SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops);
   DAG.ReplaceAllUsesOfValueWith(Op, New);
   DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1));
@@ -1541,9 +1541,9 @@
     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
   }
 
-  // The R1 register must be passed as an implicit register so that R1 is
-  // correctly zeroed in interrupts.
-  Ops.push_back(DAG.getRegister(AVR::R1, MVT::i8));
+  // The zero register (usually R1) must be passed as an implicit register so
+  // that this register is correctly zeroed in interrupts.
+  Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
 
   // Add a register mask operand representing the call-preserved registers.
   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -1666,11 +1666,11 @@
   const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
 
   if (!AFI->isInterruptOrSignalHandler()) {
-    // The return instruction has an implicit R1 operand: it must contain zero
-    // on return.
-    // This is not needed in interrupts however, where R1 is handled specially
-    // (only pushed/popped when needed).
-    RetOps.push_back(DAG.getRegister(AVR::R1, MVT::i8));
+    // The return instruction has an implicit zero register operand: it must
+    // contain zero on return.
+    // This is not needed in interrupts however, where the zero register is
+    // handled specially (only pushed/popped when needed).
+    RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
   }
 
   unsigned RetOpc =
@@ -1836,6 +1836,7 @@
                                  MutableArrayRef<std::pair<Register, int>> Regs,
                                  int64_t ShiftAmt, bool ArithmeticShift) {
   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+  const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>();
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   DebugLoc dl = MI.getDebugLoc();
 
@@ -1859,7 +1860,8 @@
 
     // Create zero register.
     Register Zero = MRI.createVirtualRegister(&AVR::GPR8RegClass);
-    BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Zero).addReg(AVR::R1);
+    BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Zero)
+        .addReg(STI.getZeroRegister());
 
     // Rotate the least significant bit from the carry bit into a new register
     // (that starts out zero).
@@ -1912,7 +1914,8 @@
     } else {
       // Create a new zero register for zero extending.
       ExtMore = MRI.createVirtualRegister(&AVR::GPR8RegClass);
-      BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ExtMore).addReg(AVR::R1);
+      BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ExtMore)
+          .addReg(STI.getZeroRegister());
       // Rotate most significant bit into a new register (that starts out zero).
       BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Ext)
           .addReg(ExtMore)
@@ -1956,7 +1959,7 @@
 
     // Zero the least significant register.
     Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
-    BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Out).addReg(AVR::R1);
+    BuildMI(*BB, MI, dl, TII.get(AVR::COPY), Out).addReg(STI.getZeroRegister());
     Regs[Regs.size() - 1] = std::pair(Out, 0);
 
     // Continue shifts with the leftover registers.
@@ -1983,7 +1986,8 @@
             .addReg(Tmp)
             .addReg(Tmp);
       } else {
-        BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ShrExtendReg).addReg(AVR::R1);
+        BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ShrExtendReg)
+            .addReg(STI.getZeroRegister());
       }
     }
     Regs[0] = std::pair(ShrExtendReg, 0);
@@ -2183,14 +2187,15 @@
   return BB;
 }
 
-// Insert a read from R1, which almost always contains the value 0.
+// Insert a read from the zero register.
 MachineBasicBlock *
-AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const {
+AVRTargetLowering::insertCopyZero(MachineInstr &MI,
+                                  MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   MachineBasicBlock::iterator I(MI);
   BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))
       .add(MI.getOperand(0))
-      .addReg(AVR::R1);
+      .addReg(Subtarget.getZeroRegister());
   MI.eraseFromParent();
   return BB;
 }
@@ -2202,7 +2207,6 @@
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   MachineBasicBlock::iterator I(MI);
-  const Register SCRATCH_REGISTER = AVR::R0;
   DebugLoc dl = MI.getDebugLoc();
 
   // Example instruction sequence, for an atomic 8-bit add:
@@ -2220,7 +2224,7 @@
   unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
 
   // Disable interrupts.
-  BuildMI(*BB, I, dl, TII.get(AVR::INRdA), SCRATCH_REGISTER)
+  BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister())
       .addImm(Subtarget.getIORegSREG());
   BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7);
 
@@ -2242,7 +2246,7 @@
   // Restore interrupts.
   BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))
       .addImm(Subtarget.getIORegSREG())
-      .addReg(SCRATCH_REGISTER);
+      .addReg(Subtarget.getTmpRegister());
 
   // Remove the pseudo instruction.
   MI.eraseFromParent();
@@ -2275,8 +2279,8 @@
   case AVR::MULRdRr:
   case AVR::MULSRdRr:
     return insertMul(MI, MBB);
-  case AVR::CopyR1:
-    return insertCopyR1(MI, MBB);
+  case AVR::CopyZero:
+    return insertCopyZero(MI, MBB);
   case AVR::AtomicLoadAdd8:
     return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8);
   case AVR::AtomicLoadAdd16:
@@ -2581,7 +2585,8 @@
       break;
     case 't': // Temporary register: r0.
       if (VT == MVT::i8)
-        return std::make_pair(unsigned(AVR::R0), &AVR::GPR8RegClass);
+        return std::make_pair(unsigned(Subtarget.getTmpRegister()),
+                              &AVR::GPR8RegClass);
       break;
     case 'w': // Special upper register pairs: r24, r26, r28, r30.
       if (VT == MVT::i8 || VT == MVT::i16)
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -918,6 +918,9 @@
   // neg Rd+1
   // neg Rd
   // sbc Rd+1, r1
+  //
+  // Note: this pseudo instruction actually uses R17 on avrtiny.
+  // This is fine, because R17 is never modified on avrtiny.
   let Uses = [R1] in
   def NEGWRd : Pseudo<(outs DREGS
                        : $rd),
@@ -1990,6 +1993,8 @@
   def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd",
                         [(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>;
 
+  // Note: this pseudo instruction actually uses R17 on avrtiny.
+  // This is fine, because R17 is never modified on avrtiny.
   let Uses = [R1] in
   def ROLBRd : Pseudo<(outs GPR8
                        : $rd),
@@ -2405,9 +2410,9 @@
                         "# Asr32 PSEUDO",
                         [(set i16:$dstlo, i16:$dsthi, (AVRasrw i16:$srclo, i16:$srchi, i8:$cnt))]>;
 
-// lowered to a copy from R1, which contains the value zero.
+// lowered to a copy from the zero register.
 let usesCustomInserter=1 in
-def CopyR1 : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>;
+def CopyZero : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -236,7 +236,7 @@
     // a compare and branch, invalidating the contents of SREG set by the
     // compare instruction because of the add/sub pairs. Conservatively save and
     // restore SREG before and after each add/sub pair.
-    BuildMI(MBB, II, dl, TII.get(AVR::INRdA), AVR::R0)
+    BuildMI(MBB, II, dl, TII.get(AVR::INRdA), STI.getTmpRegister())
         .addImm(STI.getIORegSREG());
 
     MachineInstr *New = BuildMI(MBB, II, dl, TII.get(AddOpc), AVR::R29R28)
@@ -247,7 +247,7 @@
     // Restore SREG.
     BuildMI(MBB, std::next(II), dl, TII.get(AVR::OUTARr))
         .addImm(STI.getIORegSREG())
-        .addReg(AVR::R0, RegState::Kill);
+        .addReg(STI.getTmpRegister(), RegState::Kill);
 
     // No need to set SREG as dead here otherwise if the next instruction is a
     // cond branch it will be using a dead register.
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h
--- a/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -21,6 +21,7 @@
 #include "AVRISelLowering.h"
 #include "AVRInstrInfo.h"
 #include "AVRSelectionDAGInfo.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
 
 #define GET_SUBTARGETINFO_HEADER
 #include "AVRGenSubtargetInfo.inc"
@@ -102,6 +103,13 @@
   int getRegTmpIndex() const { return hasTinyEncoding() ? 16 : 0; }
   int getRegZeroIndex() const { return hasTinyEncoding() ? 17 : 1; }
 
+  Register getTmpRegister() const {
+    return hasTinyEncoding() ? AVR::R16 : AVR::R0;
+  }
+  Register getZeroRegister() const {
+    return hasTinyEncoding() ? AVR::R17 : AVR::R1;
+  }
+
 private:
   /// The ELF e_flags architecture.
   unsigned ELFArch;
diff --git a/llvm/test/CodeGen/AVR/features/avr-tiny.ll b/llvm/test/CodeGen/AVR/features/avr-tiny.ll
--- a/llvm/test/CodeGen/AVR/features/avr-tiny.ll
+++ b/llvm/test/CodeGen/AVR/features/avr-tiny.ll
@@ -1,9 +1,90 @@
-; RUN: llc -mattr=avrtiny -O0 < %s -march=avr | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=avrtiny -O0 < %s -mtriple=avr | FileCheck %s
 
 define i16 @reg_copy16(i16, i16 %a) {
-; CHECK-LABEL: reg_copy16
-; CHECK: mov r24, r22
-; CHECK: mov r25, r23
-
+; CHECK-LABEL: reg_copy16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov r24, r22
+; CHECK-NEXT:    mov r25, r23
+; CHECK-NEXT:    ret
   ret i16 %a
 }
+
+define i8 @return_zero() {
+; CHECK-LABEL: return_zero:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov r24, r17
+; CHECK-NEXT:    ret
+  ret i8 0
+}
+
+define i8 @atomic_load8(i8* %foo) {
+; CHECK-LABEL: atomic_load8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov r26, r24
+; CHECK-NEXT:    mov r27, r25
+; CHECK-NEXT:    in r16, 63
+; CHECK-NEXT:    cli
+; CHECK-NEXT:    ld r24, X
+; CHECK-NEXT:    out 63, r16
+; CHECK-NEXT:    ret
+  %val = load atomic i8, i8* %foo unordered, align 1
+  ret i8 %val
+}
+
+define avr_signalcc void @signal_handler_with_asm() {
+; CHECK-LABEL: signal_handler_with_asm:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    push r16
+; CHECK-NEXT:    in r16, 63
+; CHECK-NEXT:    push r16
+; CHECK-NEXT:    push r24
+; CHECK-NEXT:    ldi r24, 3
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    mov r24, r24
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    pop r24
+; CHECK-NEXT:    pop r16
+; CHECK-NEXT:    out 63, r16
+; CHECK-NEXT:    pop r16
+; CHECK-NEXT:    reti
+  call i8 asm sideeffect "mov $0, $1", "=r,r"(i8 3) nounwind
+  ret void
+}
+
+declare void @foo()
+
+define avr_signalcc void @signal_handler_with_call() {
+; CHECK-LABEL: signal_handler_with_call:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    push r16
+; CHECK-NEXT:    in r16, 63
+; CHECK-NEXT:    push r16
+; CHECK-NEXT:    push r20
+; CHECK-NEXT:    push r21
+; CHECK-NEXT:    push r22
+; CHECK-NEXT:    push r23
+; CHECK-NEXT:    push r24
+; CHECK-NEXT:    push r25
+; CHECK-NEXT:    push r26
+; CHECK-NEXT:    push r27
+; CHECK-NEXT:    push r30
+; CHECK-NEXT:    push r31
+; CHECK-NEXT:    rcall foo
+; CHECK-NEXT:    pop r31
+; CHECK-NEXT:    pop r30
+; CHECK-NEXT:    pop r27
+; CHECK-NEXT:    pop r26
+; CHECK-NEXT:    pop r25
+; CHECK-NEXT:    pop r24
+; CHECK-NEXT:    pop r23
+; CHECK-NEXT:    pop r22
+; CHECK-NEXT:    pop r21
+; CHECK-NEXT:    pop r20
+; CHECK-NEXT:    pop r16
+; CHECK-NEXT:    out 63, r16
+; CHECK-NEXT:    pop r16
+; CHECK-NEXT:    reti
+  call void @foo()
+  ret void
+}