diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -588,6 +588,8 @@
   avr/udivmodhi4.S
   avr/divmodqi4.S
   avr/udivmodqi4.S
+  avr/divmodsi4.S
+  avr/udivmodsi4.S
   ${GENERIC_SOURCES}
 )
 
diff --git a/compiler-rt/lib/builtins/avr/divmodsi4.S b/compiler-rt/lib/builtins/avr/divmodsi4.S
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/builtins/avr/divmodsi4.S
@@ -0,0 +1,103 @@
+//===------------- divmodsi4.S - sint32 div & mod -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// __divmodsi4 has a special ABI as described in gcc/config/avr/avr.md: the two
+// parameters are passed as usual (in r22-r25 and r18-r21). The quotient is
+// returned in r18-r21 and the remainder is returned in r22-r25. Only r26, r27,
+// r30, r31, CC and Rtmp may be clobbered.
+//
+//===----------------------------------------------------------------------===//
+
+  .text
+  .align 2
+
+#if !defined(__AVR_TINY__) // TODO: add support for avrtiny
+
+  .globl __divmodsi4
+  .type  __divmodsi4, @function
+
+; parameter num is stored in r22-r25
+; parameter den is stored in r18-r21
+; quotient is stored in r22-r25
+; remainder is stored in r18-r21
+; temporary registers are r26, r27, r30, r31, rtmp.
+__divmodsi4:
+
+  ; - set bit 7 to the sign of (num ^ den)
+  ; - set bit 6 to the sign of num
+  mov r31, r25
+  eor r31, r21
+  bst r25, 7
+  bld r31, 6
+
+  ; negate num:
+  ; if (num < 0)
+  ;   num = -num;
+  sbrs r25, 7
+  rjmp 1f
+  com r25
+  com r24
+  com r23
+  neg r22
+  sbci r23, -1
+  sbci r24, -1
+  sbci r25, -1
+1:
+
+  ; negate den:
+  ; if (den < 0)
+  ;   den = -den;
+  sbrs r21, 7
+  rjmp 2f
+  com r21
+  com r20
+  com r19
+  neg r18
+  sbci r19, -1
+  sbci r20, -1
+  sbci r21, -1
+2:
+
+  ; Note: r31 is preserved by __udivmodsi4.
+#if defined(__AVR_HAVE_JMP_CALL__)
+  call __udivmodsi4
+#else
+  rcall __udivmodsi4
+#endif
+
+  ; negate quotient:
+  ; if ((original num < 0) != (original den < 0))
+  ;   quo = -quo;
+  sbrs r31, 7
+  rjmp 3f
+  com r21
+  com r20
+  com r19
+  neg r18
+  sbci r19, -1
+  sbci r20, -1
+  sbci r21, -1
+3:
+
+  ; negate remainder
+  ; if (original num < 0)
+  ;   rem = -rem;
+  sbrs r31, 6
+  rjmp 4f
+  com r25
+  com r24
+  com r23
+  neg r22
+  sbci r23, -1
+  sbci r24, -1
+  sbci r25, -1
+4:
+
+  ret
+
+#endif // defined(__AVR_TINY__)
diff --git a/compiler-rt/lib/builtins/avr/udivmodsi4.S b/compiler-rt/lib/builtins/avr/udivmodsi4.S
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/builtins/avr/udivmodsi4.S
@@ -0,0 +1,124 @@
+//===------------ udivmodsi4.S - uint32 div & mod -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The code below corresponds to the following C code, that is tested with over
+// 100000000000 (1e11 or over 2^36) random combinations of numerator and
+// denominator.
+//
+//   uint64_t udivmodsi4(uint32_t num, uint32_t den) {
+//       uint64_t n = num;
+//       for (char i = 32; i != 0; i--) {
+//           n <<= 1;
+//           if ((uint32_t)(n>>32) >= den) {
+//               // This bit of the quotient is one.
+//               n |= 1;
+//               // The next line is complicated in C, but simple in assembly.
+//               // It subtracts den from the upper 32 bits of n.
+//               n = (n & 0xffffffff) | ((uint64_t)((uint32_t)(n >> 32) - den) << 32);
+//           }
+//       }
+//       // Quotient is stored in the lower 32 bits, remainder is stored in the
+//       // upper 32 bits.
+//       return n;
+//   }
+//
+// This is essentially binary long division. The value n actually contains two
+// different values at once, to reduce the number of registers necessary. At the
+// start, it contains just the numerator which is shifted left by one each
+// iteration. The bottom bits are the quotient bits. Each iteration one bit of
+// the quotient is calculated. The denominator is subtracted each iteration from
+// a part of the numerator, so that at the end of the loop the numerator becomes
+// the remainder.
+//
+// __udivmodsi4 has a special ABI as described in gcc/config/avr/avr.md: the two
+// parameters are passed as usual (in r22-r25 and r18-r21). The quotient is
+// returned in r18-r21 and the remainder is returned in r22-r25. Only r26, r27,
+// r30, r31, CC and Rtmp may be clobbered.
+//
+//===----------------------------------------------------------------------===//
+
+  .text
+  .align 2
+
+#if !defined(__AVR_TINY__) // TODO: add support for avrtiny
+
+  .set __tmp_reg__, 0
+  .set __zero_reg__, 1
+
+  .globl __udivmodsi4
+  .type  __udivmodsi4, @function
+
+; parameter num is stored in r22-r25
+; parameter den is stored in r18-r21
+; quotient is stored in r22-r25
+; remainder is stored in r18-r21
+; temporary registers are r26, r27, r30, r31, rtmp.
+; However, __divmodsi4 expects r31 to be preserved so it can't be used here.
+__udivmodsi4:
+  ; n is stored in in r22-r25, r26, r27, __tmp_reg__, __zero_reg__
+  ; clear the upper 32 bits of n
+  clr r26
+  clr r27
+  clr __tmp_reg__
+  ;clr __zero_reg__ ; not needed (already zero)
+
+  ; char i = 32
+  ldi r30, 32
+
+1:
+  ; n <<= 1
+  lsl r22
+  rol r23
+  rol r24
+  rol r25
+  rol r26
+  rol r27
+  rol __tmp_reg__
+  rol __zero_reg__
+
+  ; if ((uint32_t)(n>>32) >= den)
+  cp  r26,          r18
+  cpc r27,          r19
+  cpc __tmp_reg__,  r20
+  cpc __zero_reg__, r21
+  brlo 2f
+
+  ; n |= 1
+  ori r22, 1
+
+  ; n = (n & 0xffffffff) | ((uint64_t)((uint32_t)(n >> 32) - den) << 32);
+  ; (in other words: subtract den from the upper bits of n)
+  sub r26,          r18
+  sbc r27,          r19
+  sbc __tmp_reg__,  r20
+  sbc __zero_reg__, r21
+
+2:
+  ; i--
+  dec r30
+  ; if (i == 0) goto start of loop
+  brne 1b
+
+  ; move quotient (lower bits of n) to output registers r18-r21
+  mov r18, r22
+  mov r19, r23
+  mov r20, r24
+  mov r21, r25
+
+  ; move remainder (upper bits of n) to output registers r22-r25
+  mov r22, r26
+  mov r23, r27
+  mov r24, __tmp_reg__
+  mov r25, __zero_reg__
+
+  ; clear zero register after use
+  clr __zero_reg__
+
+  ret
+
+#endif // defined(__AVR_TINY__)