diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h
--- a/llvm/lib/Target/AVR/AVR.h
+++ b/llvm/lib/Target/AVR/AVR.h
@@ -22,6 +22,7 @@
 class AVRTargetMachine;
 class FunctionPass;
 
+Pass *createAVRShiftExpandPass();
 FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
 FunctionPass *createAVRExpandPseudoPass();
@@ -30,6 +31,7 @@
 FunctionPass *createAVRDynAllocaSRPass();
 FunctionPass *createAVRBranchSelectionPass();
 
+void initializeAVRShiftExpandPass(PassRegistry &);
 void initializeAVRExpandPseudoPass(PassRegistry&);
 void initializeAVRRelaxMemPass(PassRegistry&);
 
diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AVR/AVRShiftExpand.cpp
@@ -0,0 +1,155 @@
+//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
+/// like avr-gcc. This must be done in IR because otherwise the type legalizer
+/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+  class AVRShiftExpand: public FunctionPass {
+  public:
+    static char ID;
+
+    AVRShiftExpand() : FunctionPass(ID) { }
+
+    bool runOnFunction(Function &F) override;
+
+    StringRef getPassName() const override {
+      return "AVR Shift Expansion";
+    }
+
+  private:
+    void expand(BinaryOperator *BI);
+  };
+}
+
+char AVRShiftExpand::ID = 0;
+
+INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion",
+                false, false)
+
+Pass *llvm::createAVRShiftExpandPass() {
+  return new AVRShiftExpand();
+}
+
+bool AVRShiftExpand::runOnFunction(Function &F) {
+  SmallVector<BinaryOperator *, 1> ShiftInsts;
+  auto &Ctx = F.getContext();
+  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+    Instruction *I = &*II;
+    if (!I->isShift())
+      // Only expand shift instructions (shl, lshr, ashr).
+      continue;
+    if (I->getType() != Type::getInt32Ty(Ctx))
+      // Only expand plain i32 types.
+      continue;
+    if (isa<ConstantInt>(I->getOperand(1)))
+      // Only expand when the shift amount is not known.
+      // Known shift amounts are (currently) better expanded inline.
+      continue;
+    ShiftInsts.push_back(cast<BinaryOperator>(I));
+  }
+
+  if (ShiftInsts.size() == 0)
+    // There are no shift instructions in this function that should be expanded
+    // to loops.
+    return false;
+
+  // The expanding itself needs to be done separately as expand() will remove
+  // these instructions. Removing instructions while iterating over a basic
+  // block is not a great idea.
+  for (auto I : ShiftInsts) {
+    expand(I);
+  }
+
+  // This function was modified.
+  return true;
+}
+
+void AVRShiftExpand::expand(BinaryOperator *BI) {
+  auto &Ctx = BI->getContext();
+  IRBuilder<> Builder(BI);
+  Type *Int32Ty = Type::getInt32Ty(Ctx);
+  Type *Int8Ty = Type::getInt8Ty(Ctx);
+  Value *Int8Zero = ConstantInt::get(Int8Ty, 0);
+
+  // Truncate the shift amount to i8, which is trivially lowered to a single
+  // AVR register.
+  Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty);
+
+  // Split the current basic block at the point of the existing shift
+  // instruction and insert a new basic block for the loop.
+  BasicBlock *BB = BI->getParent();
+  Function *F = BB->getParent();
+  BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done");
+  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB);
+
+  // Replace the unconditional branch that splitBasicBlock created with a
+  // conditional branch.
+  Builder.SetInsertPoint(cast<Instruction>(ShiftAmount)->getNextNode());
+  Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero);
+  BranchInst *Br = Builder.CreateCondBr(Cmp1, EndBB, LoopBB);
+  Br->getNextNode()->eraseFromParent();
+
+  // Create the loop body starting with PHI nodes.
+  Builder.SetInsertPoint(LoopBB);
+  PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
+  ShiftAmountPHI->addIncoming(ShiftAmount, BB);
+  PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
+  ValuePHI->addIncoming(BI->getOperand(0), BB);
+
+  // Subtract the shift amount by one, as we're shifting one this loop
+  // iteration.
+  Value *ShiftAmountSub = Builder.CreateSub(ShiftAmountPHI,
+                                            ConstantInt::get(Int8Ty, 1));
+  ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB);
+
+  // Emit the actual shift instruction. The difference is that this shift
+  // instruction has a constant shift amount, which can be emitted inline
+  // without a library call.
+  Value *ValueShifted;
+  switch (BI->getOpcode()) {
+  case Instruction::Shl:
+    ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
+    break;
+  case Instruction::LShr:
+    ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+    break;
+  case Instruction::AShr:
+    ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+    break;
+  default:
+    llvm_unreachable("asked to expand an instruction that is not a shift");
+  }
+  ValuePHI->addIncoming(ValueShifted, LoopBB);
+
+  // Branch to either the loop again (if there is more to shift) or to the
+  // basic block after the loop (if all bits are shifted).
+  Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero);
+  Builder.CreateCondBr(Cmp2, EndBB, LoopBB);
+
+  // Collect the resulting value. This is necessary in the IR but won't produce
+  // any actual instructions.
+  Builder.SetInsertPoint(BI);
+  PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
+  Result->addIncoming(BI->getOperand(0), BB);
+  Result->addIncoming(ValueShifted, LoopBB);
+
+  // Replace the original shift instruction.
+  BI->replaceAllUsesWith(Result);
+  BI->eraseFromParent();
+}
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -65,6 +65,7 @@
     return getTM<AVRTargetMachine>();
   }
 
+  void addIRPasses() override;
   bool addInstSelector() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
@@ -76,6 +77,15 @@
   return new AVRPassConfig(*this, PM);
 }
 
+void AVRPassConfig::addIRPasses() {
+  // Expand instructions like
+  //   %result = shl i32 %n, %amount
+  // to a loop so that library calls are avoided.
+  addPass(createAVRShiftExpandPass());
+
+  TargetPassConfig::addIRPasses();
+}
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
   // Register the target.
   RegisterTargetMachine<AVRTargetMachine> X(getTheAVRTarget());
@@ -83,6 +93,7 @@
   auto &PR = *PassRegistry::getPassRegistry();
   initializeAVRExpandPseudoPass(PR);
   initializeAVRRelaxMemPass(PR);
+  initializeAVRShiftExpandPass(PR);
 }
 
 const AVRSubtarget *AVRTargetMachine::getSubtargetImpl() const {
diff --git a/llvm/lib/Target/AVR/CMakeLists.txt b/llvm/lib/Target/AVR/CMakeLists.txt
--- a/llvm/lib/Target/AVR/CMakeLists.txt
+++ b/llvm/lib/Target/AVR/CMakeLists.txt
@@ -24,6 +24,7 @@
   AVRMCInstLower.cpp
   AVRRelaxMemOperations.cpp
   AVRRegisterInfo.cpp
+  AVRShiftExpand.cpp
   AVRSubtarget.cpp
   AVRTargetMachine.cpp
   AVRTargetObjectFile.cpp
diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/shift-expand.ll
@@ -0,0 +1,81 @@
+; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s
+
+; The avr-shift-expand pass expands large shifts with a non-constant shift
+; amount to a loop. These loops avoid generating a (non-existing) builtin such
+; as __ashlsi3.
+
+target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
+target triple = "avr"
+
+; CHECK-LABEL: @shl
+; CHECK:   %3 = trunc i32 %1 to i8
+; CHECK:   %4 = icmp eq i8 %3, 0
+; CHECK:   br i1 %4, label %shift.done, label %shift.loop
+; CHECK: shift.loop:
+; CHECK:   %5 = phi i8 [ %3, %2 ], [ %7, %shift.loop ]
+; CHECK:   %6 = phi i32 [ %0, %2 ], [ %8, %shift.loop ]
+; CHECK:   %7 = sub i8 %5, 1
+; CHECK:   %8 = shl i32 %6, 1
+; CHECK:   %9 = icmp eq i8 %7, 0
+; CHECK:   br i1 %9, label %shift.done, label %shift.loop
+; CHECK: shift.done:
+; CHECK:   %10 = phi i32 [ %0, %2 ], [ %8, %shift.loop ]
+; CHECK:   ret i32 %10
+define i32 @shl(i32 %0, i32 %1) addrspace(1) {
+  %3 = shl i32 %0, %1
+  ret i32 %3
+}
+
+; CHECK-LABEL: @lshr
+; CHECK:   %3 = trunc i32 %1 to i8
+; CHECK:   %4 = icmp eq i8 %3, 0
+; CHECK:   br i1 %4, label %shift.done, label %shift.loop
+; CHECK: shift.loop:
+; CHECK:   %5 = phi i8 [ %3, %2 ], [ %7, %shift.loop ]
+; CHECK:   %6 = phi i32 [ %0, %2 ], [ %8, %shift.loop ]
+; CHECK:   %7 = sub i8 %5, 1
+; CHECK:   %8 = lshr i32 %6, 1
+; CHECK:   %9 = icmp eq i8 %7, 0
+; CHECK:   br i1 %9, label %shift.done, label %shift.loop
+; CHECK: shift.done:
+; CHECK:   %10 = phi i32 [ %0, %2 ], [ %8, %shift.loop ]
+; CHECK:   ret i32 %10
+define i32 @lshr(i32 %0, i32 %1) addrspace(1) {
+  %3 = lshr i32 %0, %1
+  ret i32 %3
+}
+
+; CHECK-LABEL: @ashr
+; CHECK:   %3 = trunc i32 %1 to i8
+; CHECK:   %4 = icmp eq i8 %3, 0
+; CHECK:   br i1 %4, label %shift.done, label %shift.loop
+; CHECK: shift.loop:
+; CHECK:   %5 = phi i8 [ %3, %2 ], [ %7, %shift.loop ]
+; CHECK:   %6 = phi i32 [ %0, %2 ], [ %8, %shift.loop ]
+; CHECK:   %7 = sub i8 %5, 1
+; CHECK:   %8 = ashr i32 %6, 1
+; CHECK:   %9 = icmp eq i8 %7, 0
+; CHECK:   br i1 %9, label %shift.done, label %shift.loop
+; CHECK: shift.done:
+; CHECK:   %10 = phi i32 [ %0, %2 ], [ %8, %shift.loop ]
+; CHECK:   ret i32 %10
+define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
+  %3 = ashr i32 %0, %1
+  ret i32 %3
+}
+
+; This function is not modified because it is not an i32.
+; CHECK-LABEL: @shl40
+; CHECK: %3 = shl i40 %0, %1
+define i40 @shl40(i40 %0, i40 %1) addrspace(1) {
+  %3 = shl i40 %0, %1
+  ret i40 %3
+}
+
+; This function isn't either, although perhaps it should.
+; CHECK-LABEL: @shl24
+; CHECK: %3 = shl i24 %0, %1
+define i24 @shl24(i24 %0, i24 %1) addrspace(1) {
+  %3 = shl i24 %0, %1
+  ret i24 %3
+}