diff --git a/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h
@@ -0,0 +1,29 @@
+//===----- ExpandReductions.h - Expand large div/rem ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXPANDLARGEDIVREM_H
+#define LLVM_CODEGEN_EXPANDLARGEDIVREM_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Expands div/rem instructions with a bitwidth above a threshold
+/// into a call to auto-generated functions.
+/// This is useful for backends like x86 that cannot lower divisions
+/// with more than 128 bits.
+class ExpandLargeDivRemPass : public PassInfoMixin<ExpandLargeDivRemPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+  // The backend asserts when seeing large div/rem instructions.
+  static bool isRequired() { return true; }
+};
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_EXPANDLARGEDIVREM_H
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -43,6 +43,7 @@
 FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
 FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
 FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
+FUNCTION_PASS("expandlargedivrem", ExpandLargeDivRemPass, ())
 FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
 FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
 FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -485,6 +485,9 @@
   /// predicate mask.
   FunctionPass *createExpandVectorPredicationPass();
 
+  // Expands large div/rem instructions.
+  ModulePass *createExpandLargeDivRemPass();
+
   // This pass expands memcmp() to load/stores.
   FunctionPass *createExpandMemCmpPass();
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -150,6 +150,7 @@
 void initializeEHContGuardCatchretPass(PassRegistry &);
 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
 void initializeEntryExitInstrumenterPass(PassRegistry&);
+void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExpandReductionsPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -196,6 +196,7 @@
       (void) llvm::createReversePostOrderFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
       (void) llvm::createMergeICmpsLegacyPass();
+      (void) llvm::createExpandLargeDivRemPass();
       (void) llvm::createExpandMemCmpPass();
       (void) llvm::createExpandVectorPredicationPass();
       std::string buf;
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -55,6 +55,7 @@
   EdgeBundles.cpp
   EHContGuardCatchret.cpp
   ExecutionDomainFix.cpp
+  ExpandLargeDivRem.cpp
   ExpandMemCmp.cpp
   ExpandPostRAPseudos.cpp
   ExpandReductions.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -36,6 +36,7 @@
   initializeEarlyIfPredicatorPass(Registry);
   initializeEarlyMachineLICMPass(Registry);
   initializeEarlyTailDuplicatePass(Registry);
+  initializeExpandLargeDivRemLegacyPassPass(Registry);
   initializeExpandMemCmpPassPass(Registry);
   initializeExpandPostRAPass(Registry);
   initializeFEntryInserterPass(Registry);
diff --git a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -0,0 +1,296 @@
+//===--- ExpandMemCmp.cpp - Expand large div/rem ---------------------------===/
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands div/rem instructions with a bitwidth above a threshold
+// into a call to auto-generated functions.
+// This is useful for backends like x86 that cannot lower divisions
+// with more than 128 bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandLargeDivRem.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+    ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(128),
+                     cl::desc("div and rem instructions on integers with <N> "
+                              "or more bits are expanded."));
+
+static Value *getNthBit(IRBuilder<> &Builder, Value *V, Value *N) {
+  auto *Shl = Builder.CreateLShr(V, N);
+  return Builder.CreateTrunc(Shl, Type::getInt1Ty(V->getContext()));
+}
+
+static Function *getOrCreateUDivRem(bool IsDiv, Module &M, IntegerType &Ty) {
+  std::string Name =
+      (IsDiv ? "__llvm_udiv" : "__llvm_urem") + utostr(Ty.getBitWidth());
+
+  Function *F = M.getFunction(Name);
+  if (F)
+    return F;
+
+  F = Function::Create(FunctionType::get(&Ty, {&Ty, &Ty}, /*isVarArg=*/false),
+                       GlobalVariable::InternalLinkage,
+                       M.getDataLayout().getProgramAddressSpace(), Name);
+  M.getFunctionList().push_back(F);
+
+  F->addFnAttr(Attribute::NoUnwind);
+  F->addFnAttr(Attribute::WillReturn);
+  F->addFnAttr(Attribute::NoRecurse);
+  F->addFnAttr(Attribute::ReadNone);
+  // In general, div cannot be 'speculatable' due to UB when dividing by
+  // zero, but the algorithm used here doesn't produce UB.
+  F->addFnAttr(Attribute::Speculatable);
+
+  BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", F);
+  BasicBlock *LoopBB = BasicBlock::Create(M.getContext(), "loop", F);
+  BasicBlock *ThenBB = BasicBlock::Create(M.getContext(), "then", F);
+  BasicBlock *IfEndBB = BasicBlock::Create(M.getContext(), "if.end", F);
+  BasicBlock *ExitBB = BasicBlock::Create(M.getContext(), "exit", F);
+
+  auto *I32Ty = Type::getInt32Ty(M.getContext());
+
+  Argument *B = F->getArg(1);
+
+  auto *One = ConstantInt::get(&Ty, 1);
+
+  PHINode *Q = nullptr;
+  Value *NewQ = nullptr;
+  PHINode *QNext = nullptr;
+
+  // entry:
+  IRBuilder<> Builder(EntryBB);
+  Builder.CreateBr(LoopBB);
+
+  // loop:
+  Builder.SetInsertPoint(LoopBB);
+
+  auto *I = Builder.CreatePHI(I32Ty, 3, "i");
+  I->addIncoming(ConstantInt::get(I32Ty, Ty.getBitWidth() - 1), EntryBB);
+
+  if (IsDiv) {
+    Q = Builder.CreatePHI(&Ty, 2, "q");
+    Q->addIncoming(ConstantInt::get(&Ty, 0), EntryBB);
+  }
+
+  auto *R = Builder.CreatePHI(&Ty, 2, "r");
+  R->addIncoming(ConstantInt::get(&Ty, 0), EntryBB);
+
+  auto *IExt = Builder.CreateZExtOrTrunc(I, &Ty, "iext");
+
+  // A_nth = (A >> i) & 1
+  Value *An = getNthBit(Builder, F->getArg(0), IExt);
+
+  // R = R << 1
+  auto *NewR = Builder.CreateShl(R, One, "new_r");
+
+  // R = R | A_nth
+  NewR = Builder.CreateOr(NewR, Builder.CreateZExt(An, &Ty), "new_r");
+
+  auto *ExitLoopCond =
+      Builder.CreateICmpEQ(I, ConstantInt::get(I32Ty, 0), "loop_exit_cond");
+
+  // I++
+  auto *NewI = Builder.CreateAdd(I, ConstantInt::getSigned(I32Ty, -1), "new_i");
+  I->addIncoming(NewI, IfEndBB);
+
+  auto *RGreater = Builder.CreateICmpUGE(NewR, B);
+  // if (R >= B)
+  Builder.CreateCondBr(RGreater, ThenBB, IfEndBB);
+
+  // then:
+  Builder.SetInsertPoint(ThenBB);
+
+  // R = R - B
+  auto *NewR2 = Builder.CreateSub(NewR, B, "new_r");
+
+  if (IsDiv) {
+    // Q = Q | (1 << I)
+    NewQ = Builder.CreateOr(Q, Builder.CreateShl(One, IExt), "new_q");
+  }
+  // if (i == 0) break;
+  Builder.CreateBr(IfEndBB);
+
+  // else:
+  Builder.SetInsertPoint(IfEndBB);
+
+  // New
+  auto *RNext = Builder.CreatePHI(&Ty, 2, "r");
+  RNext->addIncoming(NewR2, ThenBB);
+  RNext->addIncoming(NewR, LoopBB);
+  R->addIncoming(RNext, IfEndBB);
+
+  if (IsDiv) {
+    QNext = Builder.CreatePHI(&Ty, 2, "r");
+    QNext->addIncoming(NewQ, ThenBB);
+    QNext->addIncoming(Q, LoopBB);
+    Q->addIncoming(QNext, IfEndBB);
+  }
+
+  // if (i == 0) break;
+  Builder.CreateCondBr(ExitLoopCond, ExitBB, LoopBB);
+
+  // exit:
+  Builder.SetInsertPoint(ExitBB);
+  if (IsDiv) {
+    // return Q
+    Builder.CreateRet(QNext);
+  } else {
+    // return R
+    Builder.CreateRet(RNext);
+  }
+  return F;
+}
+
+static Function *getOrCreateSDivRem(bool IsDiv, Module &M, IntegerType &Ty) {
+  std::string Name =
+      (IsDiv ? "__llvm_sdiv" : "__llvm_srem") + utostr(Ty.getBitWidth());
+
+  Function *F = M.getFunction(Name);
+  if (F)
+    return F;
+
+  F = Function::Create(FunctionType::get(&Ty, {&Ty, &Ty}, /*isVarArg=*/false),
+                       GlobalVariable::InternalLinkage,
+                       M.getDataLayout().getProgramAddressSpace(), Name);
+  M.getFunctionList().push_back(F);
+
+  Function *UDivRemFn = getOrCreateUDivRem(IsDiv, M, Ty);
+  F->setAttributes(UDivRemFn->getAttributes());
+
+  Value *A = F->getArg(0);
+  Value *B = F->getArg(1);
+  auto *Zero = ConstantInt::get(&Ty, 0);
+
+  // entry:
+  IRBuilder<> Builder(BasicBlock::Create(M.getContext(), "entry", F));
+
+  // A =  A < 0 ? -A : A
+  auto *ANeg = Builder.CreateICmpSLT(A, Zero);
+  A = Builder.CreateSelect(ANeg, Builder.CreateNeg(A), A);
+
+  // B = B < 0 ? -B : B;
+  auto *BNeg = Builder.CreateICmpSLT(B, Zero);
+  B = Builder.CreateSelect(BNeg, Builder.CreateNeg(B), B);
+
+  auto *Call = Builder.CreateCall(UDivRemFn, {A, B});
+
+  // Quo = ANeg != BNeg ? -Quo : Q;
+  // Res = A < 0 ? -Res : R
+  auto *NegateResultCond = IsDiv ? Builder.CreateICmpNE(ANeg, BNeg) : ANeg;
+
+  auto *Ret =
+      Builder.CreateSelect(NegateResultCond, Builder.CreateNeg(Call), Call);
+
+  Builder.CreateRet(Ret);
+
+  return F;
+}
+
+static Function *getOrCreateDivRem(unsigned int Opcode, Module &M,
+                                   IntegerType &Ty) {
+
+  bool IsDiv = Opcode == Instruction::UDiv || Opcode == Instruction::SDiv;
+
+  if (Opcode == Instruction::SDiv || Opcode == Instruction::SRem)
+    return getOrCreateSDivRem(IsDiv, M, Ty);
+
+  return getOrCreateUDivRem(IsDiv, M, Ty);
+}
+
+static bool runImpl(Function &F) {
+  SmallVector<Instruction *, 4> Replace;
+
+  for (auto &I : instructions(F)) {
+    switch (I.getOpcode()) {
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (!isa<IntegerType>(I.getType()) ||
+          I.getType()->getIntegerBitWidth() <= ExpandDivRemBits)
+        continue;
+      Replace.push_back(&I);
+      break;
+    default:
+      break;
+    }
+  }
+
+  if (Replace.empty())
+    return false;
+
+  while (!Replace.empty()) {
+    Instruction *I = Replace.pop_back_val();
+
+    IRBuilder<> Builder(I);
+    auto *Ty = cast<IntegerType>(I->getType());
+
+    I->replaceAllUsesWith(Builder.CreateCall(
+        getOrCreateDivRem(I->getOpcode(), *F.getParent(), *Ty),
+        {I->getOperand(0), I->getOperand(1)}));
+    I->eraseFromParent();
+  }
+
+  return true;
+}
+
+PreservedAnalyses ExpandLargeDivRemPass::run(Module &M,
+                                             ModuleAnalysisManager &AM) {
+
+  bool Changed = false;
+  for (auto &F : M)
+    Changed |= runImpl(F);
+
+  if (Changed)
+    return PreservedAnalyses::none();
+
+  return PreservedAnalyses::all();
+}
+
+class ExpandLargeDivRemLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  ExpandLargeDivRemLegacyPass() : ModulePass(ID) {
+    initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    bool Changed = false;
+    for (auto &F : M)
+      Changed |= runImpl(F);
+    return Changed;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+
+char ExpandLargeDivRemLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expandlargedivrem",
+                      "Expand large div/rem", false, false)
+INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expandlargedivrem",
+                    "Expand large div/rem", false, false)
+
+ModulePass *llvm::createExpandLargeDivRemPass() {
+  return new ExpandLargeDivRemLegacyPass();
+}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1113,6 +1113,7 @@
   if (TM->useEmulatedTLS())
     addPass(createLowerEmuTLSPass());
 
+  addPass(createExpandLargeDivRemPass());
   addPass(createPreISelIntrinsicLoweringPass());
   PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
   addIRPasses();
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -13,6 +13,7 @@
 ; CHECK-NEXT: Profile summary info
 ; CHECK-NEXT: Machine Branch Probability Analysis
 ; CHECK-NEXT:   ModulePass Manager
+; CHECK-NEXT:     Expand large div/rem
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand Atomic instructions
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -16,6 +16,7 @@
 ; CHECK-NEXT: Machine Branch Probability Analysis
 ; CHECK-NEXT: Default Regalloc Eviction Advisor
 ; CHECK-NEXT:   ModulePass Manager
+; CHECK-NEXT:     Expand large div/rem
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand Atomic instructions
diff --git a/llvm/test/CodeGen/AArch64/udivmodei5.ll b/llvm/test/CodeGen/AArch64/udivmodei5.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udivmodei5.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnuabi < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-linux-gnuabi < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @udiv129(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: udiv129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, x1
+; CHECK-NEXT:    ldrb w2, [x0, #16]
+; CHECK-NEXT:    ldp x8, x1, [x0]
+; CHECK-NEXT:    mov w4, #3
+; CHECK-NEXT:    mov x5, xzr
+; CHECK-NEXT:    mov x6, xzr
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    bl __llvm_udiv129
+; CHECK-NEXT:    and w8, w2, #0x1
+; CHECK-NEXT:    stp x0, x1, [x19]
+; CHECK-NEXT:    strb w8, [x19, #16]
+; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: udiv129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-BE-NEXT:    ldp x9, x8, [x0]
+; CHECK-BE-NEXT:    mov x19, x1
+; CHECK-BE-NEXT:    mov x4, xzr
+; CHECK-BE-NEXT:    ldrb w2, [x0, #16]
+; CHECK-BE-NEXT:    mov x5, xzr
+; CHECK-BE-NEXT:    mov w6, #3
+; CHECK-BE-NEXT:    lsr x0, x9, #56
+; CHECK-BE-NEXT:    extr x1, x9, x8, #56
+; CHECK-BE-NEXT:    bfi x2, x8, #8, #56
+; CHECK-BE-NEXT:    bl __llvm_udiv129
+; CHECK-BE-NEXT:    extr x8, x0, x1, #8
+; CHECK-BE-NEXT:    extr x9, x1, x2, #8
+; CHECK-BE-NEXT:    strb w2, [x19, #16]
+; CHECK-BE-NEXT:    and x8, x8, #0x1ffffffffffffff
+; CHECK-BE-NEXT:    stp x8, x9, [x19]
+; CHECK-BE-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ret
+  %a = load i129, i129* %ptr
+  %res = udiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: urem129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl __llvm_urem129
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: urem129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BE-NEXT:    bl __llvm_urem129
+; CHECK-BE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-BE-NEXT:    ret
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: sdiv129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl __llvm_sdiv129
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: sdiv129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BE-NEXT:    bl __llvm_sdiv129
+; CHECK-BE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-BE-NEXT:    ret
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: srem129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl __llvm_srem129
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: srem129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BE-NEXT:    bl __llvm_srem129
+; CHECK-BE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-BE-NEXT:    ret
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; CHECK-LABEL: sdiv257:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    ldr q0, [sp, #48]
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    ldr x8, [sp, #64]
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    str x8, [sp, #16]
+; CHECK-NEXT:    bl __llvm_sdiv257
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: sdiv257:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #48
+; CHECK-BE-NEXT:    add x8, sp, #48
+; CHECK-BE-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    ld1 { v0.2d }, [x8]
+; CHECK-BE-NEXT:    ldr x8, [sp, #64]
+; CHECK-BE-NEXT:    str x8, [sp, #16]
+; CHECK-BE-NEXT:    st1 { v0.2d }, [sp]
+; CHECK-BE-NEXT:    bl __llvm_sdiv257
+; CHECK-BE-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    add sp, sp, #48
+; CHECK-BE-NEXT:    ret
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -3,6 +3,7 @@
 ; REQUIRES: asserts
 
 ; CHECK:       ModulePass Manager
+; CHECK-NEXT:    Expand large div/rem
 ; CHECK-NEXT:    Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:    FunctionPass Manager
 ; CHECK-NEXT:      Expand Atomic instructions
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -15,6 +15,7 @@
 ; CHECK-NEXT: Profile summary info
 ; CHECK-NEXT: Machine Branch Probability Analysis
 ; CHECK-NEXT:   ModulePass Manager
+; CHECK-NEXT:     Expand large div/rem
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand Atomic instructions
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -24,6 +24,7 @@
 ; CHECK-NEXT: Machine Branch Probability Analysis
 ; CHECK-NEXT: Default Regalloc Eviction Advisor
 ; CHECK-NEXT:   ModulePass Manager
+; CHECK-NEXT:     Expand large div/rem
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand Atomic instructions
diff --git a/llvm/test/CodeGen/X86/udivmodei5.ll b/llvm/test/CodeGen/X86/udivmodei5.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/udivmodei5.ll
@@ -0,0 +1,693 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+define i129 @udiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: udiv129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $24, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __llvm_udiv129
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb %bl, 16(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 12(%esi)
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl %ecx, 4(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: udiv129:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __llvm_udiv129
+; X64-NEXT:    popq %rsi
+; X64-NEXT:    retq
+  %res = udiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: urem129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $24, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __llvm_urem129
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb %bl, 16(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 12(%esi)
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl %ecx, 4(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: urem129:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __llvm_urem129
+; X64-NEXT:    popq %rsi
+; X64-NEXT:    retq
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: sdiv129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $24, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __llvm_sdiv129
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb %bl, 16(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 12(%esi)
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl %ecx, 4(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: sdiv129:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __llvm_sdiv129
+; X64-NEXT:    popq %rsi
+; X64-NEXT:    retq
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: srem129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $24, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __llvm_srem129
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb %bl, 16(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 12(%esi)
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl %ecx, 4(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: srem129:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __llvm_srem129
+; X64-NEXT:    popq %rsi
+; X64-NEXT:    retq
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; X86-LABEL: sdiv257:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $56, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl 80(%ebp)
+; X86-NEXT:    pushl 76(%ebp)
+; X86-NEXT:    pushl 72(%ebp)
+; X86-NEXT:    pushl 68(%ebp)
+; X86-NEXT:    pushl 64(%ebp)
+; X86-NEXT:    pushl 60(%ebp)
+; X86-NEXT:    pushl 56(%ebp)
+; X86-NEXT:    pushl 52(%ebp)
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __llvm_sdiv257
+; X86-NEXT:    addl $72, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %bl, 32(%esi)
+; X86-NEXT:    movl %eax, 24(%esi)
+; X86-NEXT:    movl %ecx, 28(%esi)
+; X86-NEXT:    movl %edx, 16(%esi)
+; X86-NEXT:    movl %edi, 20(%esi)
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 8(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 12(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 4(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: sdiv257:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $48, %rsp
+; X64-NEXT:    movq %rdi, %rbx
+; X64-NEXT:    subq $8, %rsp
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    callq __llvm_sdiv257
+; X64-NEXT:    addq $48, %rsp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    movb {{[0-9]+}}(%rsp), %al
+; X64-NEXT:    movb %al, 32(%rbx)
+; X64-NEXT:    movq %rsi, 16(%rbx)
+; X64-NEXT:    movq %rdx, 24(%rbx)
+; X64-NEXT:    movq %rdi, (%rbx)
+; X64-NEXT:    movq %rcx, 8(%rbx)
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    addq $48, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
+
+define i1001 @srem1001(i1001 %a, i1001 %b) nounwind {
+; X86-LABEL: srem1001:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $248, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl 264(%ebp)
+; X86-NEXT:    pushl 260(%ebp)
+; X86-NEXT:    pushl 256(%ebp)
+; X86-NEXT:    pushl 252(%ebp)
+; X86-NEXT:    pushl 248(%ebp)
+; X86-NEXT:    pushl 244(%ebp)
+; X86-NEXT:    pushl 240(%ebp)
+; X86-NEXT:    pushl 236(%ebp)
+; X86-NEXT:    pushl 232(%ebp)
+; X86-NEXT:    pushl 228(%ebp)
+; X86-NEXT:    pushl 224(%ebp)
+; X86-NEXT:    pushl 220(%ebp)
+; X86-NEXT:    pushl 216(%ebp)
+; X86-NEXT:    pushl 212(%ebp)
+; X86-NEXT:    pushl 208(%ebp)
+; X86-NEXT:    pushl 204(%ebp)
+; X86-NEXT:    pushl 200(%ebp)
+; X86-NEXT:    pushl 196(%ebp)
+; X86-NEXT:    pushl 192(%ebp)
+; X86-NEXT:    pushl 188(%ebp)
+; X86-NEXT:    pushl 184(%ebp)
+; X86-NEXT:    pushl 180(%ebp)
+; X86-NEXT:    pushl 176(%ebp)
+; X86-NEXT:    pushl 172(%ebp)
+; X86-NEXT:    pushl 168(%ebp)
+; X86-NEXT:    pushl 164(%ebp)
+; X86-NEXT:    pushl 160(%ebp)
+; X86-NEXT:    pushl 156(%ebp)
+; X86-NEXT:    pushl 152(%ebp)
+; X86-NEXT:    pushl 148(%ebp)
+; X86-NEXT:    pushl 144(%ebp)
+; X86-NEXT:    pushl 140(%ebp)
+; X86-NEXT:    pushl 136(%ebp)
+; X86-NEXT:    pushl 132(%ebp)
+; X86-NEXT:    pushl 128(%ebp)
+; X86-NEXT:    pushl 124(%ebp)
+; X86-NEXT:    pushl 120(%ebp)
+; X86-NEXT:    pushl 116(%ebp)
+; X86-NEXT:    pushl 112(%ebp)
+; X86-NEXT:    pushl 108(%ebp)
+; X86-NEXT:    pushl 104(%ebp)
+; X86-NEXT:    pushl 100(%ebp)
+; X86-NEXT:    pushl 96(%ebp)
+; X86-NEXT:    pushl 92(%ebp)
+; X86-NEXT:    pushl 88(%ebp)
+; X86-NEXT:    pushl 84(%ebp)
+; X86-NEXT:    pushl 80(%ebp)
+; X86-NEXT:    pushl 76(%ebp)
+; X86-NEXT:    pushl 72(%ebp)
+; X86-NEXT:    pushl 68(%ebp)
+; X86-NEXT:    pushl 64(%ebp)
+; X86-NEXT:    pushl 60(%ebp)
+; X86-NEXT:    pushl 56(%ebp)
+; X86-NEXT:    pushl 52(%ebp)
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __llvm_srem1001
+; X86-NEXT:    addl $256, %esp # imm = 0x100
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 120(%esi)
+; X86-NEXT:    movl %eax, 116(%esi)
+; X86-NEXT:    movl %ecx, 112(%esi)
+; X86-NEXT:    movl %edx, 108(%esi)
+; X86-NEXT:    movl %edi, 104(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 100(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 96(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 92(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 88(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 84(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 80(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 76(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 72(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 68(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 64(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 60(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 56(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 52(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 48(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 44(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 40(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 36(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 32(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 28(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 24(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 20(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 16(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 12(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 8(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 4(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    andl $511, %eax # imm = 0x1FF
+; X86-NEXT:    movw %ax, 124(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: srem1001:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $152, %rsp
+; X64-NEXT:    movq %rdi, %rbx
+; X64-NEXT:    subq $8, %rsp
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    pushq {{[0-9]+}}(%rsp)
+; X64-NEXT:    callq __llvm_srem1001
+; X64-NEXT:    addq $224, %rsp
+; X64-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %edx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, (%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r15
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r9
+; X64-NEXT:    movq %rax, (%rbx)
+; X64-NEXT:    movq %rcx, 96(%rbx)
+; X64-NEXT:    movq %rbp, 112(%rbx)
+; X64-NEXT:    movq %rdi, 104(%rbx)
+; X64-NEXT:    movq %rsi, 80(%rbx)
+; X64-NEXT:    movq %r8, 88(%rbx)
+; X64-NEXT:    movq %r10, 72(%rbx)
+; X64-NEXT:    movq %r13, 48(%rbx)
+; X64-NEXT:    movq %r12, 56(%rbx)
+; X64-NEXT:    movq %r15, 40(%rbx)
+; X64-NEXT:    movq %r14, 24(%rbx)
+; X64-NEXT:    movq %r9, 8(%rbx)
+; X64-NEXT:    movq %r11, 16(%rbx)
+; X64-NEXT:    movq (%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, 32(%rbx)
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, 64(%rbx)
+; X64-NEXT:    andl $511, %edx # imm = 0x1FF
+; X64-NEXT:    movw %dx, 124(%rbx)
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl %eax, 120(%rbx)
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    addq $152, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = srem i1001 %a, %b
+  ret i1001 %res
+}
+
+define i129 @chain129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: chain129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $48, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __llvm_udiv129
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %esp, %ecx
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $17
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    calll __llvm_sdiv129
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb %bl, 16(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 12(%esi)
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl %ecx, 4(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: chain129:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __llvm_udiv129
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    movl $17, %ecx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    xorl %r9d, %r9d
+; X64-NEXT:    callq __llvm_sdiv129
+; X64-NEXT:    popq %rsi
+; X64-NEXT:    retq
+  %res = udiv i129 %a, %b
+  %res2 = sdiv i129 %res, 17
+  ret i129 %res2
+}
diff --git a/llvm/test/CodeGen/X86/urem-seteq.ll b/llvm/test/CodeGen/X86/urem-seteq.ll
--- a/llvm/test/CodeGen/X86/urem-seteq.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq.ll
@@ -363,20 +363,159 @@
 define void @ossfuzz34366() {
 ; X86-LABEL: ossfuzz34366:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    subl $64, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 80
+; X86-NEXT:    .cfi_offset %esi, -16
+; X86-NEXT:    .cfi_offset %edi, -12
+; X86-NEXT:    .cfi_offset %ebx, -8
 ; X86-NEXT:    movl (%eax), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset 12
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    pushl $-2147483648 # imm = 0x80000000
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $0
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll __llvm_urem448
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    addl $124, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -124
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    sete (%eax)
+; X86-NEXT:    addl $64, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: ossfuzz34366:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq (%rax), %rax
-; X64-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT:    andq %rax, %rcx
+; X64-NEXT:    subq $56, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 64
+; X64-NEXT:    movq (%rax), %rcx
+; X64-NEXT:    subq $8, %rsp
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq $0
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq $0
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq $0
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq $0
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq $0
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq $0
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq %rcx
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    pushq %rcx
+; X64-NEXT:    .cfi_adjust_cfa_offset 8
+; X64-NEXT:    callq __llvm_urem448
+; X64-NEXT:    addq $80, %rsp
+; X64-NEXT:    .cfi_adjust_cfa_offset -80
+; X64-NEXT:    movq (%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    orq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    orq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    orq %rdx, %rax
+; X64-NEXT:    orq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    orq {{[0-9]+}}(%rsp), %rcx
 ; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    sete (%rax)
+; X64-NEXT:    addq $56, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %L10 = load i448, i448* undef, align 4
   %B18 = urem i448 %L10, -363419362147803445274661903944002267176820680343659030140745099590319644056698961663095525356881782780381260803133088966767300814307328
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -expandlargedivrem < %s | FileCheck %s
+
+define void @sdiv129(i129* %ptr, i129* %out) nounwind {
+  %a = load i129, i129* %ptr
+  %res = sdiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+
+; CHECK-LABEL: @sdiv129(
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i129 @__llvm_sdiv129(i129 [[A]], i129 3)
+; CHECK-NEXT:    store i129 [[TMP1]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i129 @__llvm_sdiv129(i129 %0, i129 %1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i129 [[TMP0:%.*]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i129 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i129 [[TMP3]], i129 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i129 [[TMP1:%.*]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 0, [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], i129 [[TMP6]], i129 [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i129 @__llvm_udiv129(i129 [[TMP4]], i129 [[TMP7]])
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i1 [[TMP2]], [[TMP5]]
+; CHECK-NEXT:    [[TMP10:%.*]] = sub i129 0, [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP9]], i129 [[TMP10]], i129 [[TMP8]]
+; CHECK-NEXT:    ret i129 [[TMP11]]
+;
+;
+; CHECK-LABEL: define internal i129 @__llvm_udiv129(i129 %0, i129 %1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R4:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[IEXT:%.*]] = zext i32 [[I]] to i129
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1
+; CHECK-NEXT:    [[NEW_R:%.*]] = shl i129 [[R]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i129
+; CHECK-NEXT:    [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]]
+; CHECK-NEXT:    [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0
+; CHECK-NEXT:    [[NEW_I]] = add i32 [[I]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]]
+; CHECK:       then:
+; CHECK-NEXT:    [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i129 1, [[IEXT]]
+; CHECK-NEXT:    [[NEW_Q:%.*]] = or i129 [[Q]], [[TMP6]]
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ]
+; CHECK-NEXT:    [[R4]] = phi i129 [ [[NEW_Q]], [[THEN]] ], [ [[Q]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i129 [[R4]]
+;
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -expandlargedivrem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+  %a = load i129, i129* %ptr
+  %res = srem i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i129 @__llvm_srem129(i129 [[A]], i129 3)
+; CHECK-NEXT:    store i129 [[TMP1]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i129 @__llvm_srem129(i129 %0, i129 %1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i129 [[TMP0:%.*]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i129 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i129 [[TMP3]], i129 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i129 [[TMP1:%.*]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 0, [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], i129 [[TMP6]], i129 [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i129 @__llvm_urem129(i129 [[TMP4]], i129 [[TMP7]])
+; CHECK-NEXT:    [[TMP9:%.*]] = sub i129 0, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], i129 [[TMP9]], i129 [[TMP8]]
+; CHECK-NEXT:    ret i129 [[TMP10]]
+;
+;
+; CHECK-LABEL: define internal i129 @__llvm_urem129(i129 %0, i129 %1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[IEXT:%.*]] = zext i32 [[I]] to i129
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1
+; CHECK-NEXT:    [[NEW_R:%.*]] = shl i129 [[R]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i129
+; CHECK-NEXT:    [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]]
+; CHECK-NEXT:    [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0
+; CHECK-NEXT:    [[NEW_I]] = add i32 [[I]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]]
+; CHECK:       then:
+; CHECK-NEXT:    [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]]
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i129 [[R3]]
+;
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -expandlargedivrem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+  %a = load i129, i129* %ptr
+  %res = udiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i129 @__llvm_udiv129(i129 [[A]], i129 3)
+; CHECK-NEXT:    store i129 [[TMP1]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i129 @__llvm_udiv129(i129 %0, i129 %1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R4:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[IEXT:%.*]] = zext i32 [[I]] to i129
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1
+; CHECK-NEXT:    [[NEW_R:%.*]] = shl i129 [[R]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i129
+; CHECK-NEXT:    [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]]
+; CHECK-NEXT:    [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0
+; CHECK-NEXT:    [[NEW_I]] = add i32 [[I]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]]
+; CHECK:       then:
+; CHECK-NEXT:    [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i129 1, [[IEXT]]
+; CHECK-NEXT:    [[NEW_Q:%.*]] = or i129 [[Q]], [[TMP6]]
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ]
+; CHECK-NEXT:    [[R4]] = phi i129 [ [[NEW_Q]], [[THEN]] ], [ [[Q]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i129 [[R4]]
+;
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -expandlargedivrem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+  %a = load i129, i129* %ptr
+  %res = urem i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i129 @__llvm_urem129(i129 [[A]], i129 3)
+; CHECK-NEXT:    store i129 [[TMP1]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i129 @__llvm_urem129(i129 %0, i129 %1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[IEXT:%.*]] = zext i32 [[I]] to i129
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1
+; CHECK-NEXT:    [[NEW_R:%.*]] = shl i129 [[R]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i129
+; CHECK-NEXT:    [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]]
+; CHECK-NEXT:    [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0
+; CHECK-NEXT:    [[NEW_I]] = add i32 [[I]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]]
+; CHECK:       then:
+; CHECK-NEXT:    [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]]
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i129 [[R3]]
+;
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/values129.ll b/llvm/test/Transforms/ExpandLargeDivRem/values129.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/values129.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+
+; This test checks that constant propagation done on the div/rem instruction
+; (via -O2) gives the same result as using  the expandlargedivrem and then
+; constant evaluating the result.
+; RUN: opt -S -O2 < %s | FileCheck %s
+; RUN: opt -S -expandlargedivrem < %s | opt -unroll-count=129 -inline-threshold=100000 -O2 -S | FileCheck %s
+
+define {i129, i129} @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret { i129, i129 } zeroinitializer
+;
+  %ret = call {i129, i129} @udivrem(i129 0, i129 1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret { i129, i129 } { i129 1, i129 0 }
+;
+  %ret = call {i129, i129} @udivrem(i129 1, i129 1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @test3() {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret { i129, i129 } { i129 2, i129 0 }
+;
+  %ret = call {i129, i129} @udivrem(i129 2, i129 1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @test4() {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret { i129, i129 } { i129 1, i129 2 }
+;
+  %ret = call {i129, i129} @udivrem(i129 7, i129 5)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @test5() {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret { i129, i129 } { i129 3, i129 4 }
+;
+  %ret = call {i129, i129} @udivrem(i129 19, i129 5)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @test6() {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret { i129, i129 } { i129 340282366920938463463374607431768211455, i129 1 }
+;
+
+  %all_bits_set = sub i129 0, 1
+  %ret = call {i129, i129} @udivrem(i129 %all_bits_set, i129 2)
+  ret {i129, i129} %ret
+}
+
+
+; Signed test start here
+
+define {i129, i129} @stest1() {
+; CHECK-LABEL: @stest1(
+; CHECK-NEXT:    ret { i129, i129 } zeroinitializer
+;
+  %ret = call {i129, i129} @sdivrem(i129 0, i129 1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest2() {
+; CHECK-LABEL: @stest2(
+; CHECK-NEXT:    ret { i129, i129 } zeroinitializer
+;
+  %ret = call {i129, i129} @sdivrem(i129 0, i129 -1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest3() {
+; CHECK-LABEL: @stest3(
+; CHECK-NEXT:    ret { i129, i129 } { i129 1, i129 0 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 1, i129 1)
+  ret {i129, i129} %ret
+}
+
+
+define {i129, i129} @stest4() {
+; CHECK-LABEL: @stest4(
+; CHECK-NEXT:    ret { i129, i129 } { i129 -1, i129 0 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 1, i129 -1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest5() {
+; CHECK-LABEL: @stest5(
+; CHECK-NEXT:    ret { i129, i129 } { i129 2, i129 0 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 2, i129 1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest6() {
+; CHECK-LABEL: @stest6(
+; CHECK-NEXT:    ret { i129, i129 } { i129 -2, i129 0 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 2, i129 -1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest7() {
+; CHECK-LABEL: @stest7(
+; CHECK-NEXT:    ret { i129, i129 } { i129 -2, i129 0 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 -2, i129 1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest8() {
+; CHECK-LABEL: @stest8(
+; CHECK-NEXT:    ret { i129, i129 } { i129 2, i129 0 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 -2, i129 -1)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest9() {
+; CHECK-LABEL: @stest9(
+; CHECK-NEXT:    ret { i129, i129 } { i129 1, i129 2 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 7, i129 5)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest10() {
+; CHECK-LABEL: @stest10(
+; CHECK-NEXT:    ret { i129, i129 } { i129 -1, i129 -2 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 -7, i129 5)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest11() {
+; CHECK-LABEL: @stest11(
+; CHECK-NEXT:    ret { i129, i129 } { i129 3, i129 4 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 19, i129 5)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @stest12() {
+; CHECK-LABEL: @stest12(
+; CHECK-NEXT:    ret { i129, i129 } { i129 -3, i129 4 }
+;
+  %ret = call {i129, i129} @sdivrem(i129 19, i129 -5)
+  ret {i129, i129} %ret
+}
+
+define {i129, i129} @test13() {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret { i129, i129 } { i129 -340282366920938463463374607431768211455, i129 0 }
+;
+
+  %min_int = shl i129 1, 128
+  %max_int = sub i129 %min_int, 1
+  %ret = call {i129, i129} @sdivrem(i129 %max_int, i129 -1)
+  ret {i129, i129} %ret
+}
+
+define internal {i129, i129} @udivrem(i129 %a, i129 %b) {
+  %q = udiv i129 %a, %b
+  %r = urem i129 %a, %b
+
+  %agg1 = insertvalue {i129, i129} undef, i129 %q, 0
+  %agg2 = insertvalue {i129, i129} %agg1, i129 %r, 1
+
+  ret {i129, i129}  %agg2
+}
+
+define internal {i129, i129} @sdivrem(i129 %a, i129 %b) {
+  %q = sdiv i129 %a, %b
+  %r = srem i129 %a, %b
+
+  %agg1 = insertvalue {i129, i129} undef, i129 %q, 0
+  %agg2 = insertvalue {i129, i129} %agg1, i129 %r, 1
+
+  ret {i129, i129}  %agg2
+}
+
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -494,7 +494,7 @@
       "replace-with-veclib",  "jmc-instrument",
       "dot-regions",          "dot-regions-only",
       "view-regions",         "view-regions-only",
-      "select-optimize"};
+      "select-optimize",      "expandlargedivrem"};
   for (const auto &P : PassNamePrefix)
     if (Pass.startswith(P))
       return true;
@@ -543,6 +543,7 @@
   initializeTarget(Registry);
   // For codegen passes, only passes that do IR to IR transformation are
   // supported.
+  initializeExpandLargeDivRemLegacyPassPass(Registry);
   initializeExpandMemCmpPassPass(Registry);
   initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
   initializeSelectOptimizePass(Registry);